xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 8067731824783c442a18eb53d72b00c7987f7928)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162 
163       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
164       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
165         if (sub_schurs->is_hermitian) {
166           PetscInt j;
167           for (j=0;j<subset_size;j++) {
168             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169           }
170           for (j=0;j<subset_size;j++) {
171             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
172           }
173         } else {
174           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
175           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176         }
177       } else {
178         S = Sarray + cumarray;
179         St = Starray + cumarray;
180       }
181 
182       /* Threshold: this is an heuristic for edges */
183       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
184       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
185 
186       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
187         PetscBLASInt B_itype = 1;
188         PetscBLASInt B_IL, B_IU;
189         PetscReal    eps = -1.0; /* dlamch? */
190         PetscInt     nmin_s;
191 
192         /* ask for eigenvalues larger than thresh */
193         if (pcbddc->dbg_flag) {
194           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
195         }
196         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
197 #if defined(PETSC_USE_COMPLEX)
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
199 #else
200         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
201 #endif
202         ierr = PetscFPTrapPop();CHKERRQ(ierr);
203         if (B_ierr) {
204           if (B_ierr < 0 ) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
206           } else if (B_ierr <= B_N) {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
208           } else {
209             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
210           }
211         }
212 
213         if (B_neigs > nmax) {
214           if (pcbddc->dbg_flag) {
215             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
216           }
217           eigs_start = B_neigs -nmax;
218           B_neigs = nmax;
219         }
220 
221         nmin_s = PetscMin(nmin,B_N);
222         if (B_neigs < nmin_s) {
223           PetscBLASInt B_neigs2;
224 
225           B_IU = B_N - B_neigs;
226           B_IL = B_N - nmin_s + 1;
227           if (pcbddc->dbg_flag) {
228             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
229           }
230           if (sub_schurs->is_hermitian) {
231             PetscInt j;
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235             for (j=0;j<subset_size;j++) {
236               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
237             }
238           } else {
239             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
240             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
241           }
242           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
243 #if defined(PETSC_USE_COMPLEX)
244           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
245 #else
246           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
247 #endif
248           ierr = PetscFPTrapPop();CHKERRQ(ierr);
249           B_neigs += B_neigs2;
250         }
251         if (B_ierr) {
252           if (B_ierr < 0 ) {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
254           } else if (B_ierr <= B_N) {
255             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
256           } else {
257             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
258           }
259         }
260         if (pcbddc->dbg_flag) {
261           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
262           for (j=0;j<B_neigs;j++) {
263             if (eigs[j] == 0.0) {
264               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
265             } else {
266               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
267             }
268           }
269         }
270       } else {
271           /* TODO */
272       }
273 
274       maxneigs = PetscMax(B_neigs,maxneigs);
275       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
276       if (B_neigs) {
277         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279         if (pcbddc->dbg_flag > 1) {
280           PetscInt ii;
281           for (ii=0;ii<B_neigs;ii++) {
282             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283             for (j=0;j<B_N;j++) {
284               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
285             }
286           }
287         }
288 #if 0
289         for (j=0;j<B_neigs;j++) {
290           PetscBLASInt Blas_N,Blas_one = 1.0;
291           PetscScalar norm;
292           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
293           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
294                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
305         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
306         cum++;
307       }
308       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
309     }
310     /* shift for next computation */
311     cumarray += subset_size*subset_size;
312   }
313   if (pcbddc->dbg_flag) {
314     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
315   }
316 
317   if (mss) {
318     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
319     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
320     /* destroy matrices (junk) */
321     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
322     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
323   }
324   if (allocated_S_St) {
325     ierr = PetscFree2(S,St);CHKERRQ(ierr);
326   }
327   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
328 #if defined(PETSC_USE_COMPLEX)
329   ierr = PetscFree(rwork);CHKERRQ(ierr);
330 #endif
331   if (pcbddc->dbg_flag) {
332     PetscInt maxneigs_r;
333     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
334     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCSetUpSolvers"
341 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
342 {
343   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
344   PetscScalar    *coarse_submat_vals;
345   PetscErrorCode ierr;
346 
347   PetscFunctionBegin;
348   /* Setup local scatters R_to_B and (optionally) R_to_D */
349   /* PCBDDCSetUpLocalWorkVectors should be called first! */
350   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
351 
352   /* Setup local neumann solver ksp_R */
353   /* PCBDDCSetUpLocalScatters should be called first! */
354   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
355 
356   /* Change global null space passed in by the user if change of basis has been requested */
357   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
358     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
359   }
360 
361   /*
362      Setup local correction and local part of coarse basis.
363      Gives back the dense local part of the coarse matrix in column major ordering
364   */
365   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* Compute total number of coarse nodes and setup coarse solver */
368   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
369 
370   /* free */
371   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "PCBDDCResetCustomization"
377 PetscErrorCode PCBDDCResetCustomization(PC pc)
378 {
379   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
384   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
385   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
388   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
389   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
390   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
391   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
393   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
394   PetscFunctionReturn(0);
395 }
396 
397 #undef __FUNCT__
398 #define __FUNCT__ "PCBDDCResetTopography"
399 PetscErrorCode PCBDDCResetTopography(PC pc)
400 {
401   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
408   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
409   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "PCBDDCResetSolvers"
415 PetscErrorCode PCBDDCResetSolvers(PC pc)
416 {
417   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
418   PetscScalar    *array;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
423   if (pcbddc->coarse_phi_B) {
424     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
425     ierr = PetscFree(array);CHKERRQ(ierr);
426   }
427   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
430   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
432   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
434   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
436   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
437   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
440   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
443   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
446   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
447   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
448   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
449   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
455 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
456 {
457   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
458   PC_IS          *pcis = (PC_IS*)pc->data;
459   VecType        impVecType;
460   PetscInt       n_constraints,n_R,old_size;
461   PetscErrorCode ierr;
462 
463   PetscFunctionBegin;
464   if (!pcbddc->ConstraintMatrix) {
465     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
466   }
467   /* get sizes */
468   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
469   n_R = pcis->n-pcbddc->n_vertices;
470   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
471   /* local work vectors (try to avoid unneeded work)*/
472   /* R nodes */
473   old_size = -1;
474   if (pcbddc->vec1_R) {
475     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
476   }
477   if (n_R != old_size) {
478     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
480     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
482     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
483     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
484   }
485   /* local primal dofs */
486   old_size = -1;
487   if (pcbddc->vec1_P) {
488     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
489   }
490   if (pcbddc->local_primal_size != old_size) {
491     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
492     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
493     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
494     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
495   }
496   /* local explicit constraints */
497   old_size = -1;
498   if (pcbddc->vec1_C) {
499     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
500   }
501   if (n_constraints && n_constraints != old_size) {
502     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
503     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
504     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
505     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "PCBDDCSetUpCorrection"
512 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
513 {
514   PetscErrorCode         ierr;
515   /* pointers to pcis and pcbddc */
516   PC_IS*                 pcis = (PC_IS*)pc->data;
517   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
518   /* submatrices of local problem */
519   Mat                    A_RV,A_VR,A_VV,local_auxmat2_R;
520   /* submatrices of local coarse problem */
521   Mat                    S_VV,S_CV,S_VC,S_CC;
522   /* working matrices */
523   Mat                    C_CR;
524   /* additional working stuff */
525   PC                     pc_R;
526   Mat                    F;
527   PetscBool              isLU,isCHOL,isILU;
528 
529   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
530   PetscScalar            *work;
531   PetscInt               *idx_V_B;
532   PetscInt               n,n_vertices,n_constraints;
533   PetscInt               i,n_R,n_D,n_B;
534   PetscBool              unsymmetric_check;
535   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
536   MatType                impMatType;
537   /* some shortcuts to scalars */
538   PetscScalar            one=1.0,m_one=-1.0;
539 
540   PetscFunctionBegin;
541   n_vertices = pcbddc->n_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B;
545   n_D = pcis->n - n_B;
546   n_R = pcis->n - n_vertices;
547 
548   /* Set types for local objects needed by BDDC precondtioner */
549   impMatType = MATSEQDENSE;
550 
551   /* vertices in boundary numbering */
552   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
553   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
554   if (i != n_vertices) {
555     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
556   }
557 
558   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
559   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
560   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
561   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
562   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
563   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
564   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
565   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
566   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
567   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
568 
569   unsymmetric_check = PETSC_FALSE;
570   /* allocate workspace */
571   n = 0;
572   if (n_constraints) {
573     n += n_R*n_constraints;
574   }
575   if (n_vertices) {
576     n = PetscMax(2*n_R*n_vertices,n);
577     n = PetscMax((n_R+n_B)*n_vertices,n);
578   }
579   if (!pcbddc->issym) {
580     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
581     unsymmetric_check = PETSC_TRUE;
582   }
583   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
584 
585   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
586   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
588   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
590   if (isLU || isILU || isCHOL) {
591     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
592   } else {
593     F = NULL;
594   }
595 
596   /* Precompute stuffs needed for preprocessing and application of BDDC*/
597   if (n_constraints) {
598     Mat         M1,M2,M3;
599     Mat         auxmat;
600     IS          is_aux;
601     PetscScalar *array,*array2;
602 
603     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
604     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
605 
606     /* Extract constraints on R nodes: C_{CR}  */
607     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
608     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
609     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
610 
611     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
612     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
613     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
614     for (i=0;i<n_constraints;i++) {
615       const PetscScalar *row_cmat_values;
616       const PetscInt    *row_cmat_indices;
617       PetscInt          size_of_constraint,j;
618 
619       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
620       for (j=0;j<size_of_constraint;j++) {
621         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
622       }
623       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
624     }
625     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
626     if (F) {
627       Mat B;
628 
629       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
630       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
631       ierr = MatDestroy(&B);CHKERRQ(ierr);
632     } else {
633       PetscScalar *marr;
634 
635       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
636       for (i=0;i<n_constraints;i++) {
637         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
638         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
639         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
640         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
641         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
642       }
643       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
644     }
645     if (!pcbddc->switch_static) {
646       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
648       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
649       for (i=0;i<n_constraints;i++) {
650         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
651         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
652         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
653         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
654         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
655         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
656       }
657       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
658       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
659       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
660     } else {
661       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
662       pcbddc->local_auxmat2 = local_auxmat2_R;
663       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
664     }
665     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
666     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
667     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
668     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
669     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
670     if (isCHOL) {
671       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
672     } else {
673       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
674     }
675     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
676     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
677     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
678     ierr = MatDestroy(&M2);CHKERRQ(ierr);
679     ierr = MatDestroy(&M3);CHKERRQ(ierr);
680     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
681     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
682     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
683     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
684     ierr = MatDestroy(&M1);CHKERRQ(ierr);
685   }
686   /* Get submatrices from subdomain matrix */
687   if (n_vertices) {
688     Mat       newmat;
689     IS        is_aux;
690     PetscInt  ibs,mbs;
691     PetscBool issbaij;
692 
693     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
694     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
695     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
696     if (ibs != mbs) { /* need to convert to SEQAIJ */
697       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
698       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
699       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
700       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
701       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
702       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
703     } else {
704       /* this is safe */
705       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
706       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
707       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
708       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
709         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
710         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
711         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
712         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
713         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
714       } else {
715         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
716         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
717       }
718       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
719       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
720     }
721     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
722   }
723 
724   /* Matrix of coarse basis functions (local) */
725   if (pcbddc->coarse_phi_B) {
726     PetscInt on_B,on_primal,on_D=n_D;
727     if (pcbddc->coarse_phi_D) {
728       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
729     }
730     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
731     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
732       PetscScalar *marray;
733 
734       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
735       ierr = PetscFree(marray);CHKERRQ(ierr);
736       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
737       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
738       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
739       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
740     }
741   }
742 
743   if (!pcbddc->coarse_phi_B) {
744     PetscScalar *marray;
745 
746     n = n_B*pcbddc->local_primal_size;
747     if (pcbddc->switch_static || pcbddc->dbg_flag) {
748       n += n_D*pcbddc->local_primal_size;
749     }
750     if (!pcbddc->issym) {
751       n *= 2;
752     }
753     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
754     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
755     n = n_B*pcbddc->local_primal_size;
756     if (pcbddc->switch_static || pcbddc->dbg_flag) {
757       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
758       n += n_D*pcbddc->local_primal_size;
759     }
760     if (!pcbddc->issym) {
761       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
762       if (pcbddc->switch_static || pcbddc->dbg_flag) {
763         n = n_B*pcbddc->local_primal_size;
764         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
765       }
766     } else {
767       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
768       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
769       if (pcbddc->switch_static || pcbddc->dbg_flag) {
770         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
771         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
772       }
773     }
774   }
775   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
776   /* vertices */
777   if (n_vertices) {
778 
779     if (n_R) {
780       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
781       PetscBLASInt B_N,B_one = 1;
782       PetscScalar  *x,*y;
783 
784       ierr = PetscMemzero(work,n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
785       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
786       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
787       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
788       if (F) {
789         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
790       } else {
791         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
792         for (i=0;i<n_vertices;i++) {
793           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
794           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
795           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
796           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
797           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
798         }
799         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
800       }
801       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
802       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
803       if (n_constraints) {
804         Mat B;
805 
806         for (i=0;i<n_vertices;i++) {
807           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
808           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
809           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
810           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
811           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
812           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
813         }
814         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
815         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
816         ierr = MatDestroy(&B);CHKERRQ(ierr);
817         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
818         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
819         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
820         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
821         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
822         ierr = MatDestroy(&B);CHKERRQ(ierr);
823       }
824       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
825       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
826       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
827       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
828       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
829       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
830       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
831       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
832       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
833       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
834     } else {
835       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
836     }
837     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
838     /* coarse basis functions */
839     for (i=0;i<n_vertices;i++) {
840       PetscScalar *y;
841 
842       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
843       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
844       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
845       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
846       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
847       y[n_B*i+idx_V_B[i]] = 1.0;
848       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
849       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
850 
851       if (pcbddc->switch_static || pcbddc->dbg_flag) {
852         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
853         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
854         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
857         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
858       }
859       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
860     }
861   }
862 
863   if (n_constraints) {
864     Mat B;
865 
866     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
867     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
868     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
869     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
870     if (n_vertices) {
871       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
872         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
873       } else {
874         Mat S_VCt;
875 
876         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
877         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
878         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
879       }
880     }
881     ierr = MatDestroy(&B);CHKERRQ(ierr);
882     /* coarse basis functions */
883     for (i=0;i<n_constraints;i++) {
884       PetscScalar *y;
885 
886       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
887       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
888       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
889       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
892       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
893       if (pcbddc->switch_static || pcbddc->dbg_flag) {
894         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
895         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
896         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
899         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
900       }
901       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
902     }
903   }
904   if (n_constraints) {
905     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
906   }
907 
908   /* compute other basis functions for non-symmetric problems */
909   if (!pcbddc->issym) {
910 
911     if (n_constraints) {
912       Mat S_CCT,B_C;
913 
914       /* this is a lazy thing */
915       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
916       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
917       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
918       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
919       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
920       if (n_vertices) {
921         Mat B_V,S_VCT;
922 
923         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
924         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
925         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
926         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
927         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
928       }
929       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
930     } else { /* if there are no constraints, reset work */
931       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
932     }
933     if (n_vertices && n_R) {
934       Mat          A_VRT;
935       PetscScalar  *marray;
936       PetscBLASInt B_N,B_one = 1;
937 
938       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
939       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
940       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
941       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
942       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
943       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
944       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
945     }
946 
947     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
948       for (i=0;i<pcbddc->local_primal_size;i++) {
949         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
950         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
951         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
952         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
953         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
954       }
955     } else {
956       for (i=0;i<pcbddc->local_primal_size;i++) {
957         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
958         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
959         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
960         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
961         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
962       }
963     }
964     /* coarse basis functions */
965     for (i=0;i<pcbddc->local_primal_size;i++) {
966       PetscScalar *y;
967 
968       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
969       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
970       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
971       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973       if (i<n_vertices) {
974         y[n_B*i+idx_V_B[i]] = 1.0;
975       }
976       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
977       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
978 
979       if (pcbddc->switch_static || pcbddc->dbg_flag) {
980         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
981         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
982         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
985         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
986       }
987       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
988     }
989   }
990   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
991   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
992   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
993   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
994   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
995   /* Checking coarse_sub_mat and coarse basis functios */
996   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
997   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
998   if (pcbddc->dbg_flag) {
999     Mat         coarse_sub_mat;
1000     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1001     Mat         coarse_phi_D,coarse_phi_B;
1002     Mat         coarse_psi_D,coarse_psi_B;
1003     Mat         A_II,A_BB,A_IB,A_BI;
1004     Mat         C_B,CPHI;
1005     IS          is_dummy;
1006     Vec         mones;
1007     MatType     checkmattype=MATSEQAIJ;
1008     PetscReal   real_value;
1009 
1010     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1011     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1012     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1013     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1014     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1015     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1016     if (unsymmetric_check) {
1017       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1018       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1019     }
1020     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1021 
1022     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1023     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1024     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1025     if (unsymmetric_check) {
1026       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1027       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1028       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1029       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1030       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1031       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1032       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1033       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1034       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1035       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1036       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1037       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1038     } else {
1039       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1040       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1041       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1042       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1043       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1044       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1045       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1046       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1047     }
1048     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1049     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1050     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1051     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1052     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1053     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1054     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1055     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1056 
1057     /* check constraints */
1058     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1059     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1060     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1061     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1062     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1063     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1064     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1065     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1066     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1067     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1068     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1069     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1070 
1071     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1072     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1073     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1074     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1075     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1076     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1077     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1078     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1079     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1080     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1081     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1082     if (unsymmetric_check) {
1083       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1084       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1085     }
1086     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1087   }
1088   /* free memory */
1089   ierr = PetscFree(work);CHKERRQ(ierr);
1090   if (n_vertices) {
1091     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1092   }
1093   if (n_constraints) {
1094     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1095   }
1096   /* get back data */
1097   *coarse_submat_vals_n = coarse_submat_vals;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1103 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1104 {
1105   Mat            *work_mat;
1106   IS             isrow_s,iscol_s;
1107   PetscBool      rsorted,csorted;
1108   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1113   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1114   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1115   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1116 
1117   if (!rsorted) {
1118     const PetscInt *idxs;
1119     PetscInt *idxs_sorted,i;
1120 
1121     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1122     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1123     for (i=0;i<rsize;i++) {
1124       idxs_perm_r[i] = i;
1125     }
1126     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1127     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1128     for (i=0;i<rsize;i++) {
1129       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1130     }
1131     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1133   } else {
1134     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1135     isrow_s = isrow;
1136   }
1137 
1138   if (!csorted) {
1139     if (isrow == iscol) {
1140       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1141       iscol_s = isrow_s;
1142     } else {
1143       const PetscInt *idxs;
1144       PetscInt *idxs_sorted,i;
1145 
1146       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1147       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1148       for (i=0;i<csize;i++) {
1149         idxs_perm_c[i] = i;
1150       }
1151       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1152       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1153       for (i=0;i<csize;i++) {
1154         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1155       }
1156       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1157       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1158     }
1159   } else {
1160     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1161     iscol_s = iscol;
1162   }
1163 
1164   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1165 
1166   if (!rsorted || !csorted) {
1167     Mat      new_mat;
1168     IS       is_perm_r,is_perm_c;
1169 
1170     if (!rsorted) {
1171       PetscInt *idxs_r,i;
1172       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1173       for (i=0;i<rsize;i++) {
1174         idxs_r[idxs_perm_r[i]] = i;
1175       }
1176       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1177       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1178     } else {
1179       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1180     }
1181     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1182 
1183     if (!csorted) {
1184       if (isrow_s == iscol_s) {
1185         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1186         is_perm_c = is_perm_r;
1187       } else {
1188         PetscInt *idxs_c,i;
1189         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1190         for (i=0;i<csize;i++) {
1191           idxs_c[idxs_perm_c[i]] = i;
1192         }
1193         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1194         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1195       }
1196     } else {
1197       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1198     }
1199     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1200 
1201     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1202     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1203     work_mat[0] = new_mat;
1204     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1205     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1206   }
1207 
1208   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1209   *B = work_mat[0];
1210   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1211   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1212   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 #undef __FUNCT__
1217 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1218 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1219 {
1220   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1221   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1222   Mat            new_mat;
1223   IS             is_local,is_global;
1224   PetscInt       local_size;
1225   PetscBool      isseqaij;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1230   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1231   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1232   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1233   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1234   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1235   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1236 
1237   /* check */
1238   if (pcbddc->dbg_flag) {
1239     Vec       x,x_change;
1240     PetscReal error;
1241 
1242     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1243     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1244     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1245     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1248     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1249     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1250     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1251     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1252     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1253     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1254     ierr = VecDestroy(&x);CHKERRQ(ierr);
1255     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1256   }
1257 
1258   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1259   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1260   if (isseqaij) {
1261     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1262   } else {
1263     Mat work_mat;
1264     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1265     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1266     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1267   }
1268   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1269 #if !defined(PETSC_USE_COMPLEX)
1270   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1271 #endif
1272   /*
1273   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1274   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1275   */
1276   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1282 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1283 {
1284   PC_IS*         pcis = (PC_IS*)(pc->data);
1285   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1286   IS             is_aux1,is_aux2;
1287   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1288   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1289   PetscInt       vbs,bs;
1290   PetscBT        bitmask;
1291   PetscErrorCode ierr;
1292 
1293   PetscFunctionBegin;
1294   /*
1295     No need to setup local scatters if
1296       - primal space is unchanged
1297         AND
1298       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1299         AND
1300       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1301   */
1302   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1303     PetscFunctionReturn(0);
1304   }
1305   /* destroy old objects */
1306   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1307   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1308   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1309   /* Set Non-overlapping dimensions */
1310   n_B = pcis->n_B;
1311   n_D = pcis->n - n_B;
1312   n_vertices = pcbddc->n_vertices;
1313   /* create auxiliary bitmask */
1314   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1315   for (i=0;i<n_vertices;i++) {
1316     ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1317   }
1318 
1319   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1320   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1321   for (i=0, n_R=0; i<pcis->n; i++) {
1322     if (!PetscBTLookup(bitmask,i)) {
1323       idx_R_local[n_R] = i;
1324       n_R++;
1325     }
1326   }
1327 
1328   /* Block code */
1329   vbs = 1;
1330   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1331   if (bs>1 && !(n_vertices%bs)) {
1332     PetscBool is_blocked = PETSC_TRUE;
1333     PetscInt  *vary;
1334     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1335     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1336     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1337     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1338     for (i=0; i<n_vertices/bs; i++) {
1339       if (vary[i]!=0 && vary[i]!=bs) {
1340         is_blocked = PETSC_FALSE;
1341         break;
1342       }
1343     }
1344     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1345       vbs = bs;
1346       for (i=0;i<n_R/vbs;i++) {
1347         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1348       }
1349     }
1350     ierr = PetscFree(vary);CHKERRQ(ierr);
1351   }
1352   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1353   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1354 
1355   /* print some info if requested */
1356   if (pcbddc->dbg_flag) {
1357     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1358     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1359     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1360     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1361     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1362     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1363     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1364   }
1365 
1366   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1367   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1368   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1369   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1370   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1371   for (i=0; i<n_D; i++) {
1372     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1373   }
1374   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1375   for (i=0, j=0; i<n_R; i++) {
1376     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1377       aux_array1[j++] = i;
1378     }
1379   }
1380   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1381   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1382   for (i=0, j=0; i<n_B; i++) {
1383     if (!PetscBTLookup(bitmask,is_indices[i])) {
1384       aux_array2[j++] = i;
1385     }
1386   }
1387   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1388   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1389   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1390   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1391   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1392 
1393   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1394     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1395     for (i=0, j=0; i<n_R; i++) {
1396       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1397         aux_array1[j++] = i;
1398       }
1399     }
1400     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1401     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1402     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1403   }
1404   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1405   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1412 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1413 {
1414   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1415   PC_IS          *pcis = (PC_IS*)pc->data;
1416   PC             pc_temp;
1417   Mat            A_RR;
1418   MatReuse       reuse;
1419   PetscScalar    m_one = -1.0;
1420   PetscReal      value;
1421   PetscInt       n_D,n_R,ibs,mbs;
1422   PetscBool      use_exact,use_exact_reduced,issbaij;
1423   PetscErrorCode ierr;
1424   /* prefixes stuff */
1425   char           dir_prefix[256],neu_prefix[256],str_level[16];
1426   size_t         len;
1427 
1428   PetscFunctionBegin;
1429 
1430   /* compute prefixes */
1431   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1432   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1433   if (!pcbddc->current_level) {
1434     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1435     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1436     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1437     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1438   } else {
1439     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1440     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1441     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1442     len -= 15; /* remove "pc_bddc_coarse_" */
1443     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1444     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1445     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1446     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1447     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1448     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1449     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1450     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1451   }
1452 
1453   /* DIRICHLET PROBLEM */
1454   if (dirichlet) {
1455     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1456     if (pcbddc->issym) {
1457       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1458     }
1459     /* Matrix for Dirichlet problem is pcis->A_II */
1460     n_D = pcis->n - pcis->n_B;
1461     if (!pcbddc->ksp_D) { /* create object if not yet build */
1462       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1463       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1464       /* default */
1465       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1466       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1467       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1468       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1469       if (issbaij) {
1470         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1471       } else {
1472         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1473       }
1474       /* Allow user's customization */
1475       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1476       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1477     }
1478     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1479     if (sub_schurs->interior_solver) {
1480       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1481     }
1482     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1483     if (!n_D) {
1484       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1485       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1486     }
1487     /* Set Up KSP for Dirichlet problem of BDDC */
1488     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1489     /* set ksp_D into pcis data */
1490     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1491     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1492     pcis->ksp_D = pcbddc->ksp_D;
1493   }
1494 
1495   /* NEUMANN PROBLEM */
1496   A_RR = 0;
1497   if (neumann) {
1498     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1499     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1500     if (pcbddc->ksp_R) { /* already created ksp */
1501       PetscInt nn_R;
1502       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1503       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1504       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1505       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1506         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1507         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1508         reuse = MAT_INITIAL_MATRIX;
1509       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1510         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1511           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1512           reuse = MAT_INITIAL_MATRIX;
1513         } else { /* safe to reuse the matrix */
1514           reuse = MAT_REUSE_MATRIX;
1515         }
1516       }
1517       /* last check */
1518       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1519         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1520         reuse = MAT_INITIAL_MATRIX;
1521       }
1522     } else { /* first time, so we need to create the matrix */
1523       reuse = MAT_INITIAL_MATRIX;
1524     }
1525     /* extract A_RR */
1526     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1527     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1528     if (ibs != mbs) {
1529       Mat newmat;
1530       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1531       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1532       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1533     } else {
1534       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1535     }
1536     if (pcbddc->issym) {
1537       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1538     }
1539     if (!pcbddc->ksp_R) { /* create object if not present */
1540       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1541       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1542       /* default */
1543       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1544       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1545       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1546       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1547       if (issbaij) {
1548         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1549       } else {
1550         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1551       }
1552       /* Allow user's customization */
1553       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1554       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1555     }
1556     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1557     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1558     if (!n_R) {
1559       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1560       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1561     }
1562     /* Set Up KSP for Neumann problem of BDDC */
1563     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1564   }
1565 
1566   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1567   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1568     if (pcbddc->dbg_flag) {
1569       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1570       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1571       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1572     }
1573     if (dirichlet) { /* Dirichlet */
1574       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1575       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1576       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1577       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1578       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1579       /* need to be adapted? */
1580       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1581       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1582       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1583       /* print info */
1584       if (pcbddc->dbg_flag) {
1585         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1586         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1587       }
1588       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1589         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1590       }
1591     }
1592     if (neumann) { /* Neumann */
1593       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1594       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1595       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1596       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1597       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1598       /* need to be adapted? */
1599       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1600       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1601       /* print info */
1602       if (pcbddc->dbg_flag) {
1603         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1604         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1605       }
1606       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1607         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1608       }
1609     }
1610   }
1611   /* free Neumann problem's matrix */
1612   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 #undef __FUNCT__
1617 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1618 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1619 {
1620   PetscErrorCode ierr;
1621   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1622 
1623   PetscFunctionBegin;
1624   ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1625   if (!pcbddc->switch_static) {
1626     if (applytranspose && pcbddc->local_auxmat1) {
1627       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1628       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1629     }
1630     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1631     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1632   } else {
1633     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1634     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1635     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1636     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1637     if (applytranspose && pcbddc->local_auxmat1) {
1638       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1639       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1640       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1642     }
1643   }
1644   if (applytranspose) {
1645     ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1646   } else {
1647     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1648   }
1649   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1650   if (!pcbddc->switch_static) {
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653     if (!applytranspose && pcbddc->local_auxmat1) {
1654       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1655       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1656     }
1657   } else {
1658     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662     if (!applytranspose && pcbddc->local_auxmat1) {
1663       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1664       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1665     }
1666     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1668     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1669     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1675 #undef __FUNCT__
1676 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1677 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1678 {
1679   PetscErrorCode ierr;
1680   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1681   PC_IS*            pcis = (PC_IS*)  (pc->data);
1682   const PetscScalar zero = 0.0;
1683 
1684   PetscFunctionBegin;
1685   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1686   if (applytranspose) {
1687     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1688     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1689   } else {
1690     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1691     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1692   }
1693   /* start communications from local primal nodes to rhs of coarse solver */
1694   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1695   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1696   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1697 
1698   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1699   /* TODO remove null space when doing multilevel */
1700   if (pcbddc->coarse_ksp) {
1701     Vec rhs,sol;
1702 
1703     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1704     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1705     if (applytranspose) {
1706       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1707     } else {
1708       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1709     }
1710   }
1711 
1712   /* Local solution on R nodes */
1713   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1714     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1715   }
1716 
1717   /* communications from coarse sol to local primal nodes */
1718   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1719   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1720 
1721   /* Sum contributions from two levels */
1722   if (applytranspose) {
1723     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1724     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1725   } else {
1726     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1727     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1734 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1735 {
1736   PetscErrorCode ierr;
1737   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1738   PetscScalar    *array;
1739   Vec            from,to;
1740 
1741   PetscFunctionBegin;
1742   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1743     from = pcbddc->coarse_vec;
1744     to = pcbddc->vec1_P;
1745     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1746       Vec tvec;
1747 
1748       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1749       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1750       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1751       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1752       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1753       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1754     }
1755   } else { /* from local to global -> put data in coarse right hand side */
1756     from = pcbddc->vec1_P;
1757     to = pcbddc->coarse_vec;
1758   }
1759   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1765 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1766 {
1767   PetscErrorCode ierr;
1768   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1769   PetscScalar    *array;
1770   Vec            from,to;
1771 
1772   PetscFunctionBegin;
1773   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1774     from = pcbddc->coarse_vec;
1775     to = pcbddc->vec1_P;
1776   } else { /* from local to global -> put data in coarse right hand side */
1777     from = pcbddc->vec1_P;
1778     to = pcbddc->coarse_vec;
1779   }
1780   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1781   if (smode == SCATTER_FORWARD) {
1782     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1783       Vec tvec;
1784 
1785       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1786       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1787       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1788       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1789     }
1790   } else {
1791     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1792      ierr = VecResetArray(from);CHKERRQ(ierr);
1793     }
1794   }
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 /* uncomment for testing purposes */
1799 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1800 #undef __FUNCT__
1801 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1802 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1803 {
1804   PetscErrorCode    ierr;
1805   PC_IS*            pcis = (PC_IS*)(pc->data);
1806   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1807   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1808   /* one and zero */
1809   PetscScalar       one=1.0,zero=0.0;
1810   /* space to store constraints and their local indices */
1811   PetscScalar       *constraints_data;
1812   PetscInt          *constraints_idxs,*constraints_idxs_B;
1813   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1814   PetscInt          *constraints_n;
1815   /* iterators */
1816   PetscInt          i,j,k,total_counts,total_counts_cc;
1817   /* BLAS integers */
1818   PetscBLASInt      lwork,lierr;
1819   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1820   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1821   /* reuse */
1822   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1823   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1824   /* change of basis */
1825   PetscBool         boolforchange,qr_needed;
1826   PetscBT           change_basis,qr_needed_idx;
1827   /* auxiliary stuff */
1828   PetscInt          *nnz,*is_indices;
1829   PetscInt          ncc;
1830   /* some quantities */
1831   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1832   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1833 
1834   PetscFunctionBegin;
1835   /* Destroy Mat objects computed previously */
1836   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1837   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1838   /* save info on constraints from previous setup (if any) */
1839   olocal_primal_size = pcbddc->local_primal_size;
1840   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1841   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1842   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1843   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1844   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1845   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1846 
1847   /* print some info */
1848   if (pcbddc->dbg_flag) {
1849     IS       vertices;
1850     PetscInt nv,nedges,nfaces;
1851     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1852     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1853     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1854     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1855     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1856     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1857     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1858     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1859     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1860   }
1861 
1862   if (!pcbddc->adaptive_selection) {
1863     IS           ISForVertices,*ISForFaces,*ISForEdges;
1864     MatNullSpace nearnullsp;
1865     const Vec    *nearnullvecs;
1866     Vec          *localnearnullsp;
1867     PetscScalar  *array;
1868     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1869     PetscBool    nnsp_has_cnst;
1870     /* LAPACK working arrays for SVD or POD */
1871     PetscBool    skip_lapack;
1872     PetscScalar  *work;
1873     PetscReal    *singular_vals;
1874 #if defined(PETSC_USE_COMPLEX)
1875     PetscReal    *rwork;
1876 #endif
1877 #if defined(PETSC_MISSING_LAPACK_GESVD)
1878     PetscScalar  *temp_basis,*correlation_mat;
1879 #else
1880     PetscBLASInt dummy_int=1;
1881     PetscScalar  dummy_scalar=1.;
1882 #endif
1883 
1884     /* Get index sets for faces, edges and vertices from graph */
1885     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1886     /* free unneeded index sets */
1887     if (!pcbddc->use_vertices) {
1888       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1889     }
1890     if (!pcbddc->use_edges) {
1891       for (i=0;i<n_ISForEdges;i++) {
1892         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1893       }
1894       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1895       n_ISForEdges = 0;
1896     }
1897     if (!pcbddc->use_faces) {
1898       for (i=0;i<n_ISForFaces;i++) {
1899         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1900       }
1901       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1902       n_ISForFaces = 0;
1903     }
1904 
1905 #if defined(PETSC_USE_DEBUG)
1906     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
1907        Also use_change_of_basis should be consistent among processors */
1908     if (pcbddc->NullSpace) {
1909       PetscBool tbool[2],gbool[2];
1910 
1911       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
1912         pcbddc->use_change_of_basis = PETSC_TRUE;
1913         if (!ISForEdges) {
1914           pcbddc->use_change_on_faces = PETSC_TRUE;
1915         }
1916       }
1917       tbool[0] = pcbddc->use_change_of_basis;
1918       tbool[1] = pcbddc->use_change_on_faces;
1919       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1920       pcbddc->use_change_of_basis = gbool[0];
1921       pcbddc->use_change_on_faces = gbool[1];
1922     }
1923 #endif
1924 
1925     /* check if near null space is attached to global mat */
1926     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1927     if (nearnullsp) {
1928       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1929       /* remove any stored info */
1930       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1931       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1932       /* store information for BDDC solver reuse */
1933       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1934       pcbddc->onearnullspace = nearnullsp;
1935       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1936       for (i=0;i<nnsp_size;i++) {
1937         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1938       }
1939     } else { /* if near null space is not provided BDDC uses constants by default */
1940       nnsp_size = 0;
1941       nnsp_has_cnst = PETSC_TRUE;
1942     }
1943     /* get max number of constraints on a single cc */
1944     max_constraints = nnsp_size;
1945     if (nnsp_has_cnst) max_constraints++;
1946 
1947     /*
1948          Evaluate maximum storage size needed by the procedure
1949          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
1950          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
1951          There can be multiple constraints per connected component
1952                                                                                                                                                            */
1953     n_vertices = 0;
1954     if (ISForVertices) {
1955       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1956     }
1957     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
1958     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
1959 
1960     total_counts = n_ISForFaces+n_ISForEdges;
1961     total_counts *= max_constraints;
1962     total_counts += n_vertices;
1963     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1964 
1965     total_counts = 0;
1966     max_size_of_constraint = 0;
1967     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1968       IS used_is;
1969       if (i<n_ISForEdges) {
1970         used_is = ISForEdges[i];
1971       } else {
1972         used_is = ISForFaces[i-n_ISForEdges];
1973       }
1974       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
1975       total_counts += j;
1976       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1977     }
1978     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
1979 
1980     /* get local part of global near null space vectors */
1981     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1982     for (k=0;k<nnsp_size;k++) {
1983       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1984       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1985       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1986     }
1987 
1988     /* whether or not to skip lapack calls */
1989     skip_lapack = PETSC_TRUE;
1990     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1991 
1992     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1993     if (!skip_lapack) {
1994       PetscScalar temp_work;
1995 
1996 #if defined(PETSC_MISSING_LAPACK_GESVD)
1997       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1998       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1999       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2000       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2001 #if defined(PETSC_USE_COMPLEX)
2002       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2003 #endif
2004       /* now we evaluate the optimal workspace using query with lwork=-1 */
2005       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2006       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2007       lwork = -1;
2008       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2009 #if !defined(PETSC_USE_COMPLEX)
2010       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2011 #else
2012       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2013 #endif
2014       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2015       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2016 #else /* on missing GESVD */
2017       /* SVD */
2018       PetscInt max_n,min_n;
2019       max_n = max_size_of_constraint;
2020       min_n = max_constraints;
2021       if (max_size_of_constraint < max_constraints) {
2022         min_n = max_size_of_constraint;
2023         max_n = max_constraints;
2024       }
2025       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2026 #if defined(PETSC_USE_COMPLEX)
2027       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2028 #endif
2029       /* now we evaluate the optimal workspace using query with lwork=-1 */
2030       lwork = -1;
2031       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2032       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2033       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2034       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2035 #if !defined(PETSC_USE_COMPLEX)
2036       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2037 #else
2038       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2039 #endif
2040       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2041       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2042 #endif /* on missing GESVD */
2043       /* Allocate optimal workspace */
2044       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2045       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2046     }
2047     /* Now we can loop on constraining sets */
2048     total_counts = 0;
2049     constraints_idxs_ptr[0] = 0;
2050     constraints_data_ptr[0] = 0;
2051     /* vertices */
2052     if (n_vertices) {
2053       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2054       if (nnsp_has_cnst) { /* it considers all possible vertices */
2055         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2056         for (i=0;i<n_vertices;i++) {
2057           constraints_n[total_counts] = 1;
2058           constraints_data[total_counts] = 1.0;
2059           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2060           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2061           total_counts++;
2062         }
2063       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2064         PetscBool used_vertex;
2065         for (i=0;i<n_vertices;i++) {
2066           used_vertex = PETSC_FALSE;
2067           k = 0;
2068           while (!used_vertex && k<nnsp_size) {
2069             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2070             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2071               constraints_n[total_counts] = 1;
2072               constraints_idxs[total_counts] = is_indices[i];
2073               constraints_data[total_counts] = 1.0;
2074               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2075               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2076               total_counts++;
2077               used_vertex = PETSC_TRUE;
2078             }
2079             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2080             k++;
2081           }
2082         }
2083       }
2084       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2085       n_vertices = total_counts;
2086     }
2087 
2088     /* edges and faces */
2089     total_counts_cc = total_counts;
2090     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2091       IS        used_is;
2092       PetscBool idxs_copied = PETSC_FALSE;
2093 
2094       if (ncc<n_ISForEdges) {
2095         used_is = ISForEdges[ncc];
2096         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2097       } else {
2098         used_is = ISForFaces[ncc-n_ISForEdges];
2099         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2100       }
2101       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2102 
2103       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2104       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2105       /* change of basis should not be performed on local periodic nodes */
2106       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2107       if (nnsp_has_cnst) {
2108         PetscScalar quad_value;
2109 
2110         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2111         idxs_copied = PETSC_TRUE;
2112 
2113         if (!pcbddc->use_nnsp_true) {
2114           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2115         } else {
2116           quad_value = 1.0;
2117         }
2118         for (j=0;j<size_of_constraint;j++) {
2119           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2120         }
2121         temp_constraints++;
2122         total_counts++;
2123       }
2124       for (k=0;k<nnsp_size;k++) {
2125         PetscReal real_value;
2126         PetscScalar *ptr_to_data;
2127 
2128         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2129         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2130         for (j=0;j<size_of_constraint;j++) {
2131           ptr_to_data[j] = array[is_indices[j]];
2132         }
2133         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2134         /* check if array is null on the connected component */
2135         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2136         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2137         if (real_value > 0.0) { /* keep indices and values */
2138           temp_constraints++;
2139           total_counts++;
2140           if (!idxs_copied) {
2141             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2142             idxs_copied = PETSC_TRUE;
2143           }
2144         }
2145       }
2146       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2147       valid_constraints = temp_constraints;
2148       if (!pcbddc->use_nnsp_true && temp_constraints) {
2149         if (temp_constraints == 1) { /* just normalize the constraint */
2150           PetscScalar norm,*ptr_to_data;
2151 
2152           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2153           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2154           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2155           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2156           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2157         } else { /* perform SVD */
2158           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2159           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2160 
2161 #if defined(PETSC_MISSING_LAPACK_GESVD)
2162           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2163              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2164              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2165                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2166                 from that computed using LAPACKgesvd
2167              -> This is due to a different computation of eigenvectors in LAPACKheev
2168              -> The quality of the POD-computed basis will be the same */
2169           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2170           /* Store upper triangular part of correlation matrix */
2171           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2172           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2173           for (j=0;j<temp_constraints;j++) {
2174             for (k=0;k<j+1;k++) {
2175               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2176             }
2177           }
2178           /* compute eigenvalues and eigenvectors of correlation matrix */
2179           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2180           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2181 #if !defined(PETSC_USE_COMPLEX)
2182           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2183 #else
2184           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2185 #endif
2186           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2187           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2188           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2189           j = 0;
2190           while (j < temp_constraints && singular_vals[j] < tol) j++;
2191           total_counts = total_counts-j;
2192           valid_constraints = temp_constraints-j;
2193           /* scale and copy POD basis into used quadrature memory */
2194           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2195           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2196           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2197           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2198           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2199           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2200           if (j<temp_constraints) {
2201             PetscInt ii;
2202             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2203             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2204             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2205             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2206             for (k=0;k<temp_constraints-j;k++) {
2207               for (ii=0;ii<size_of_constraint;ii++) {
2208                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2209               }
2210             }
2211           }
2212 #else  /* on missing GESVD */
2213           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2214           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2215           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2216           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2217 #if !defined(PETSC_USE_COMPLEX)
2218           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2219 #else
2220           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2221 #endif
2222           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2223           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2224           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2225           k = temp_constraints;
2226           if (k > size_of_constraint) k = size_of_constraint;
2227           j = 0;
2228           while (j < k && singular_vals[k-j-1] < tol) j++;
2229           valid_constraints = k-j;
2230           total_counts = total_counts-temp_constraints+valid_constraints;
2231 #endif /* on missing GESVD */
2232         }
2233       }
2234       /* update pointers information */
2235       if (valid_constraints) {
2236         constraints_n[total_counts_cc] = valid_constraints;
2237         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2238         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2239         total_counts_cc++;
2240         /* set change_of_basis flag */
2241         if (boolforchange) {
2242           for (j=valid_constraints;j>0;j--) {
2243             PetscBTSet(change_basis,total_counts-j);
2244           }
2245         }
2246       }
2247     }
2248     /* free workspace */
2249     if (!skip_lapack) {
2250       ierr = PetscFree(work);CHKERRQ(ierr);
2251 #if defined(PETSC_USE_COMPLEX)
2252       ierr = PetscFree(rwork);CHKERRQ(ierr);
2253 #endif
2254       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2255 #if defined(PETSC_MISSING_LAPACK_GESVD)
2256       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2257       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2258 #endif
2259     }
2260     for (k=0;k<nnsp_size;k++) {
2261       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2262     }
2263     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2264     /* free index sets of faces, edges and vertices */
2265     for (i=0;i<n_ISForFaces;i++) {
2266       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2267     }
2268     if (n_ISForFaces) {
2269       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2270     }
2271     for (i=0;i<n_ISForEdges;i++) {
2272       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2273     }
2274     if (n_ISForEdges) {
2275       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2276     }
2277     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2278   } else {
2279     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2280 
2281     total_counts = 0;
2282     n_vertices = 0;
2283     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2284       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2285     }
2286     max_constraints = 0;
2287     total_counts_cc = 0;
2288     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2289       total_counts += pcbddc->adaptive_constraints_n[i];
2290       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2291       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2292     }
2293     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2294     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2295     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2296     constraints_data = pcbddc->adaptive_constraints_data;
2297     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2298     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2299     total_counts_cc = 0;
2300     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2301       if (pcbddc->adaptive_constraints_n[i]) {
2302         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2303       }
2304     }
2305 #if 0
2306     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2307     for (i=0;i<total_counts_cc;i++) {
2308       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2309       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2310       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2311         printf(" %d",constraints_idxs[j]);
2312       }
2313       printf("\n");
2314       printf("number of cc: %d\n",constraints_n[i]);
2315     }
2316     for (i=0;i<n_vertices;i++) {
2317       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2318     }
2319     for (i=0;i<sub_schurs->n_subs;i++) {
2320       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2321     }
2322 #endif
2323 
2324     max_size_of_constraint = 0;
2325     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2326     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2327     /* Change of basis */
2328     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2329     if (pcbddc->use_change_of_basis) {
2330       PetscInt cum = n_vertices;
2331       for (i=0;i<sub_schurs->n_subs;i++) {
2332         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2333           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2334             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2335           }
2336         }
2337         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2338       }
2339     }
2340   }
2341   pcbddc->local_primal_size = total_counts;
2342   pcbddc->local_primal_size_cc = total_counts_cc;
2343   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2344   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2345 
2346   /* map constraints_idxs in boundary numbering */
2347   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2348   if (i != constraints_idxs_ptr[total_counts_cc]) {
2349     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2350   }
2351 
2352   /* Create constraint matrix */
2353   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2354   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2355   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2356 
2357   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2358   /* determine if a QR strategy is needed for change of basis */
2359   qr_needed = PETSC_FALSE;
2360   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2361   total_primal_vertices=0;
2362   for (i=0;i<total_counts_cc;i++) {
2363     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2364     if (size_of_constraint == 1) {
2365       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2366     } else if (PetscBTLookup(change_basis,i)) {
2367       for (k=0;k<constraints_n[i];k++) {
2368         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2369       }
2370       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2371         PetscBTSet(qr_needed_idx,i);
2372         qr_needed = PETSC_TRUE;
2373       }
2374     }
2375   }
2376   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2377   pcbddc->n_vertices = total_primal_vertices;
2378   /* permute indices in order to have a sorted set of vertices */
2379   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2380   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2381   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2382 
2383   /* nonzero structure of constraint matrix */
2384   /* and get reference dof for local constraints */
2385   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2386   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2387 
2388   j = total_primal_vertices;
2389   total_counts = total_primal_vertices;
2390   for (i=n_vertices;i<total_counts_cc;i++) {
2391     if (!PetscBTLookup(change_basis,i)) {
2392       pcbddc->local_primal_ref_node[i] = constraints_idxs[constraints_idxs_ptr[i]];
2393       pcbddc->local_primal_ref_mult[i] = constraints_n[i];
2394       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2395       for (k=0;k<constraints_n[i];k++) {
2396         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2397         nnz[j+k] = size_of_constraint;
2398       }
2399       j += constraints_n[i];
2400     }
2401   }
2402   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2403   ierr = PetscFree(nnz);CHKERRQ(ierr);
2404 
2405   /* set values in constraint matrix */
2406   for (i=0;i<total_primal_vertices;i++) {
2407     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2408   }
2409   total_counts = total_primal_vertices;
2410   for (i=n_vertices;i<total_counts_cc;i++) {
2411     if (!PetscBTLookup(change_basis,i)) {
2412       PetscInt *cols;
2413 
2414       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2415       cols = constraints_idxs+constraints_idxs_ptr[i];
2416       for (k=0;k<constraints_n[i];k++) {
2417         PetscInt    row = total_counts+k;
2418         PetscScalar *vals;
2419 
2420         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2421         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2422       }
2423       total_counts += constraints_n[i];
2424     }
2425   }
2426   /* assembling */
2427   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2428   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2429 
2430   /*
2431   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2432   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2433   */
2434   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2435   if (pcbddc->use_change_of_basis) {
2436     /* dual and primal dofs on a single cc */
2437     PetscInt     dual_dofs,primal_dofs;
2438     /* working stuff for GEQRF */
2439     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2440     PetscBLASInt lqr_work;
2441     /* working stuff for UNGQR */
2442     PetscScalar  *gqr_work,lgqr_work_t;
2443     PetscBLASInt lgqr_work;
2444     /* working stuff for TRTRS */
2445     PetscScalar  *trs_rhs;
2446     PetscBLASInt Blas_NRHS;
2447     /* pointers for values insertion into change of basis matrix */
2448     PetscInt     *start_rows,*start_cols;
2449     PetscScalar  *start_vals;
2450     /* working stuff for values insertion */
2451     PetscBT      is_primal;
2452     PetscInt     *aux_primal_numbering_B;
2453     /* matrix sizes */
2454     PetscInt     global_size,local_size;
2455     /* temporary change of basis */
2456     Mat          localChangeOfBasisMatrix;
2457     /* extra space for debugging */
2458     PetscScalar  *dbg_work;
2459 
2460     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2461     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2462     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2463     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2464     /* nonzeros for local mat */
2465     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2466     for (i=0;i<pcis->n;i++) nnz[i]=1;
2467     for (i=n_vertices;i<total_counts_cc;i++) {
2468       if (PetscBTLookup(change_basis,i)) {
2469         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2470         if (PetscBTLookup(qr_needed_idx,i)) {
2471           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2472         } else {
2473           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2474           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2475         }
2476       }
2477     }
2478     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2479     ierr = PetscFree(nnz);CHKERRQ(ierr);
2480     /* Set initial identity in the matrix */
2481     for (i=0;i<pcis->n;i++) {
2482       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2483     }
2484 
2485     if (pcbddc->dbg_flag) {
2486       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2487       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2488     }
2489 
2490 
2491     /* Now we loop on the constraints which need a change of basis */
2492     /*
2493        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2494        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2495 
2496        Basic blocks of change of basis matrix T computed by
2497 
2498           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2499 
2500             | 1        0   ...        0         s_1/S |
2501             | 0        1   ...        0         s_2/S |
2502             |              ...                        |
2503             | 0        ...            1     s_{n-1}/S |
2504             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2505 
2506             with S = \sum_{i=1}^n s_i^2
2507             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2508                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2509 
2510           - QR decomposition of constraints otherwise
2511     */
2512     if (qr_needed) {
2513       /* space to store Q */
2514       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2515       /* first we issue queries for optimal work */
2516       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2517       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2518       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2519       lqr_work = -1;
2520       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2521       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2522       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2523       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2524       lgqr_work = -1;
2525       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2526       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2527       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2528       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2529       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2530       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2531       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2532       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2533       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2534       /* array to store scaling factors for reflectors */
2535       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2536       /* array to store rhs and solution of triangular solver */
2537       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2538       /* allocating workspace for check */
2539       if (pcbddc->dbg_flag) {
2540         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2541       }
2542     }
2543     /* array to store whether a node is primal or not */
2544     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2545     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2546     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2547     if (i != total_primal_vertices) {
2548       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2549     }
2550     for (i=0;i<total_primal_vertices;i++) {
2551       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2552     }
2553     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2554 
2555     /* loop on constraints and see whether or not they need a change of basis and compute it */
2556     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2557       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2558       if (PetscBTLookup(change_basis,total_counts)) {
2559         /* get constraint info */
2560         primal_dofs = constraints_n[total_counts];
2561         dual_dofs = size_of_constraint-primal_dofs;
2562 
2563         if (pcbddc->dbg_flag) {
2564           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2565         }
2566 
2567         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2568 
2569           /* copy quadrature constraints for change of basis check */
2570           if (pcbddc->dbg_flag) {
2571             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2572           }
2573           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2574           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2575 
2576           /* compute QR decomposition of constraints */
2577           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2578           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2579           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2580           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2581           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2582           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2583           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2584 
2585           /* explictly compute R^-T */
2586           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2587           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2588           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2589           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2590           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2591           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2592           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2593           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2594           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2595           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2596 
2597           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2598           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2599           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2600           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2601           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2602           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2603           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2604           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2605           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2606 
2607           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2608              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2609              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2610           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2611           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2612           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2613           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2614           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2615           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2616           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2617           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2618           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2619           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2620 
2621           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2622           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2623           /* insert cols for primal dofs */
2624           for (j=0;j<primal_dofs;j++) {
2625             start_vals = &qr_basis[j*size_of_constraint];
2626             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2627             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2628           }
2629           /* insert cols for dual dofs */
2630           for (j=0,k=0;j<dual_dofs;k++) {
2631             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2632               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2633               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2634               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2635               j++;
2636             }
2637           }
2638 
2639           /* check change of basis */
2640           if (pcbddc->dbg_flag) {
2641             PetscInt   ii,jj;
2642             PetscBool valid_qr=PETSC_TRUE;
2643             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2644             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2645             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2646             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2647             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2648             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2649             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2650             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2651             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2652             for (jj=0;jj<size_of_constraint;jj++) {
2653               for (ii=0;ii<primal_dofs;ii++) {
2654                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2655                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2656               }
2657             }
2658             if (!valid_qr) {
2659               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2660               for (jj=0;jj<size_of_constraint;jj++) {
2661                 for (ii=0;ii<primal_dofs;ii++) {
2662                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2663                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2664                   }
2665                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2666                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2667                   }
2668                 }
2669               }
2670             } else {
2671               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2672             }
2673           }
2674         } else { /* simple transformation block */
2675           PetscInt    row,col;
2676           PetscScalar val,norm;
2677 
2678           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2679           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2680           for (j=0;j<size_of_constraint;j++) {
2681             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2682             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2683             if (!PetscBTLookup(is_primal,row_B)) {
2684               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2685               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2686               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2687             } else {
2688               for (k=0;k<size_of_constraint;k++) {
2689                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2690                 if (row != col) {
2691                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2692                 } else {
2693                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2694                 }
2695                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2696               }
2697             }
2698           }
2699           if (pcbddc->dbg_flag) {
2700             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2701           }
2702         }
2703       } else {
2704         if (pcbddc->dbg_flag) {
2705           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2706         }
2707       }
2708     }
2709 
2710     /* free workspace */
2711     if (qr_needed) {
2712       if (pcbddc->dbg_flag) {
2713         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2714       }
2715       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2716       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2717       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2718       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2719       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2720     }
2721     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2722     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2723     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724 
2725     /* assembling of global change of variable */
2726     {
2727       Mat      tmat;
2728       PetscInt bs;
2729 
2730       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2731       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2732       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2733       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2734       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2735       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2736       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2737       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2738       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2739       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2740       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2741       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2742       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2743       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2744       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2745       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2746       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2747       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2748     }
2749     /* check */
2750     if (pcbddc->dbg_flag) {
2751       PetscReal error;
2752       Vec       x,x_change;
2753 
2754       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2755       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2756       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2757       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2758       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2759       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2760       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2761       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2762       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2763       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2764       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2765       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2766       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2767       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2768       ierr = VecDestroy(&x);CHKERRQ(ierr);
2769       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2770     }
2771 
2772     /* adapt sub_schurs computed (if any) */
2773     if (pcbddc->use_deluxe_scaling) {
2774       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2775       if (sub_schurs->S_Ej_all) {
2776         Mat S_new,tmat;
2777         IS is_all_N;
2778 
2779         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2780         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2781         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2782         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2783         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2784         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2785         sub_schurs->S_Ej_all = S_new;
2786         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2787         if (sub_schurs->sum_S_Ej_all) {
2788           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2789           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2790           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2791           sub_schurs->sum_S_Ej_all = S_new;
2792           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2793         }
2794         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2795       }
2796     }
2797     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2798   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2799     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2800     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2801   }
2802 
2803   /* set up change of basis context */
2804   if (pcbddc->ChangeOfBasisMatrix) {
2805     PCBDDCChange_ctx change_ctx;
2806 
2807     if (!pcbddc->new_global_mat) {
2808       PetscInt global_size,local_size;
2809 
2810       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2811       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2812       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2813       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2814       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2815       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2816       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2817       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2818       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2819     } else {
2820       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2821       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2822       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2823     }
2824     if (!pcbddc->user_ChangeOfBasisMatrix) {
2825       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2826       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2827     } else {
2828       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2829       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2830     }
2831     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2832     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2833     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2834     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2835   }
2836 
2837   /* check if a new primal space has been introduced */
2838   pcbddc->new_primal_space_local = PETSC_TRUE;
2839   if (olocal_primal_size == pcbddc->local_primal_size) {
2840     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2841     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2842     if (!pcbddc->new_primal_space_local) {
2843       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2844       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2845     }
2846   }
2847   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2848   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2849   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2850 
2851   /* flush dbg viewer */
2852   if (pcbddc->dbg_flag) {
2853     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2854   }
2855 
2856   /* free workspace */
2857   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2858   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2859   if (!pcbddc->adaptive_selection) {
2860     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2861     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2862   } else {
2863     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2864                       pcbddc->adaptive_constraints_idxs_ptr,
2865                       pcbddc->adaptive_constraints_data_ptr,
2866                       pcbddc->adaptive_constraints_idxs,
2867                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2868     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2869     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2870   }
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2876 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2877 {
2878   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2879   PC_IS       *pcis = (PC_IS*)pc->data;
2880   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2881   PetscInt    ierr,i,vertex_size,N;
2882   PetscViewer viewer=pcbddc->dbg_viewer;
2883 
2884   PetscFunctionBegin;
2885   /* Reset previously computed graph */
2886   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2887   /* Init local Graph struct */
2888   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2889   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2890 
2891   /* Check validity of the csr graph passed in by the user */
2892   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2893     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2894   }
2895 
2896   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2897   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2898     PetscInt  *xadj,*adjncy;
2899     PetscInt  nvtxs;
2900     PetscBool flg_row=PETSC_FALSE;
2901 
2902     if (pcbddc->use_local_adj) {
2903 
2904       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2905       if (flg_row) {
2906         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2907         pcbddc->computed_rowadj = PETSC_TRUE;
2908       }
2909       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2910     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2911       IS                     is_dummy;
2912       ISLocalToGlobalMapping l2gmap_dummy;
2913       PetscInt               j,sum;
2914       PetscInt               *cxadj,*cadjncy;
2915       const PetscInt         *idxs;
2916       PCBDDCGraph            graph;
2917       PetscBT                is_on_boundary;
2918 
2919       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2920       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2921       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2922       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2923       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2924       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2925       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2926       if (flg_row) {
2927         graph->xadj = xadj;
2928         graph->adjncy = adjncy;
2929       }
2930       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2931       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2932       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2933 
2934       if (pcbddc->dbg_flag) {
2935         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2936         for (i=0;i<graph->ncc;i++) {
2937           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2938         }
2939       }
2940 
2941       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2942       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2943       for (i=0;i<pcis->n_B;i++) {
2944         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2945       }
2946       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2947 
2948       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2949       sum = 0;
2950       for (i=0;i<graph->ncc;i++) {
2951         PetscInt sizecc = 0;
2952         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2953           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2954             sizecc++;
2955           }
2956         }
2957         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2958           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2959             cxadj[graph->queue[j]] = sizecc;
2960           }
2961         }
2962         sum += sizecc*sizecc;
2963       }
2964       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2965       sum = 0;
2966       for (i=0;i<pcis->n;i++) {
2967         PetscInt temp = cxadj[i];
2968         cxadj[i] = sum;
2969         sum += temp;
2970       }
2971       cxadj[pcis->n] = sum;
2972       for (i=0;i<graph->ncc;i++) {
2973         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2974           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2975             PetscInt k,sizecc = 0;
2976             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2977               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2978                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2979                 sizecc++;
2980               }
2981             }
2982           }
2983         }
2984       }
2985       if (sum) {
2986         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2987       } else {
2988         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2989         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2990       }
2991       graph->xadj = 0;
2992       graph->adjncy = 0;
2993       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2994       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2995     }
2996   }
2997   if (pcbddc->dbg_flag) {
2998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2999   }
3000 
3001   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3002   vertex_size = 1;
3003   if (pcbddc->user_provided_isfordofs) {
3004     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3005       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3006       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3007         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3008         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3009       }
3010       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3011       pcbddc->n_ISForDofs = 0;
3012       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3013     }
3014     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3015     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3016   } else {
3017     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3018       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3019       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3020       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3021         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3022       }
3023     }
3024   }
3025 
3026   /* Setup of Graph */
3027   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3028     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3029   }
3030   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3031     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3032   }
3033   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3034 
3035   /* Graph's connected components analysis */
3036   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3037 
3038   /* print some info to stdout */
3039   if (pcbddc->dbg_flag) {
3040     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3041   }
3042 
3043   /* mark topography has done */
3044   pcbddc->recompute_topography = PETSC_FALSE;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 #undef __FUNCT__
3049 #define __FUNCT__ "PCBDDCSubsetNumbering"
3050 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3051 {
3052   Vec            local_vec,global_vec;
3053   IS             seqis,paris;
3054   VecScatter     scatter_ctx;
3055   PetscScalar    *array;
3056   PetscInt       *temp_global_dofs;
3057   PetscScalar    globalsum;
3058   PetscInt       i,j,s;
3059   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3060   PetscMPIInt    rank_prec_comm,size_prec_comm;
3061   PetscInt       *dof_sizes,*dof_displs;
3062   PetscBool      first_found;
3063   PetscErrorCode ierr;
3064 
3065   PetscFunctionBegin;
3066   /* mpi buffers */
3067   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3068   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3069   j = ( !rank_prec_comm ? size_prec_comm : 0);
3070   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3071   /* get maximum size of subset */
3072   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3073   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3074   max_local = 0;
3075   for (i=0;i<n_local_dofs;i++) {
3076     if (max_local < temp_global_dofs[i] ) {
3077       max_local = temp_global_dofs[i];
3078     }
3079   }
3080   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3081   max_global++;
3082   max_local = 0;
3083   for (i=0;i<n_local_dofs;i++) {
3084     if (max_local < local_dofs[i] ) {
3085       max_local = local_dofs[i];
3086     }
3087   }
3088   max_local++;
3089   /* allocate workspace */
3090   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3091   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3092   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3093   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3094   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3095   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3096   /* create scatter */
3097   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3098   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3099   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3100   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3101   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3102   /* init array */
3103   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3104   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3105   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3106   if (local_dofs_mult) {
3107     for (i=0;i<n_local_dofs;i++) {
3108       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3109     }
3110   } else {
3111     for (i=0;i<n_local_dofs;i++) {
3112       array[local_dofs[i]]=1.0;
3113     }
3114   }
3115   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3116   /* scatter into global vec and get total number of global dofs */
3117   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3118   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3119   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3120   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3121   /* Fill global_vec with cumulative function for global numbering */
3122   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3123   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3124   nlocals = 0;
3125   first_index = -1;
3126   first_found = PETSC_FALSE;
3127   for (i=0;i<s;i++) {
3128     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3129       first_found = PETSC_TRUE;
3130       first_index = i;
3131     }
3132     nlocals += (PetscInt)PetscRealPart(array[i]);
3133   }
3134   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3135   if (!rank_prec_comm) {
3136     dof_displs[0]=0;
3137     for (i=1;i<size_prec_comm;i++) {
3138       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3139     }
3140   }
3141   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3142   if (first_found) {
3143     array[first_index] += (PetscScalar)nlocals;
3144     old_index = first_index;
3145     for (i=first_index+1;i<s;i++) {
3146       if (PetscRealPart(array[i]) > 0.1) {
3147         array[i] += array[old_index];
3148         old_index = i;
3149       }
3150     }
3151   }
3152   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3153   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3154   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3155   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3156   /* get global ordering of local dofs */
3157   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3158   if (local_dofs_mult) {
3159     for (i=0;i<n_local_dofs;i++) {
3160       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3161     }
3162   } else {
3163     for (i=0;i<n_local_dofs;i++) {
3164       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3165     }
3166   }
3167   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3168   /* free workspace */
3169   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3170   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3171   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3172   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3173   /* return pointer to global ordering of local dofs */
3174   *global_numbering_subset = temp_global_dofs;
3175   PetscFunctionReturn(0);
3176 }
3177 
3178 #undef __FUNCT__
3179 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3180 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3181 {
3182   PetscInt       i,j;
3183   PetscScalar    *alphas;
3184   PetscErrorCode ierr;
3185 
3186   PetscFunctionBegin;
3187   /* this implements stabilized Gram-Schmidt */
3188   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3189   for (i=0;i<n;i++) {
3190     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3191     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3192     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3193   }
3194   ierr = PetscFree(alphas);CHKERRQ(ierr);
3195   PetscFunctionReturn(0);
3196 }
3197 
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatISGetSubassemblingPattern"
3200 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3201 {
3202   Mat             subdomain_adj;
3203   IS              new_ranks,ranks_send_to;
3204   MatPartitioning partitioner;
3205   Mat_IS          *matis;
3206   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3207   PetscInt        prank;
3208   PetscMPIInt     size,rank,color;
3209   PetscInt        *xadj,*adjncy,*oldranks;
3210   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3211   PetscInt        i,local_size,threshold=0;
3212   PetscErrorCode  ierr;
3213   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3214   PetscSubcomm    subcomm;
3215 
3216   PetscFunctionBegin;
3217   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3218   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3219   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3220 
3221   /* Get info on mapping */
3222   matis = (Mat_IS*)(mat->data);
3223   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3224   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3225 
3226   /* build local CSR graph of subdomains' connectivity */
3227   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3228   xadj[0] = 0;
3229   xadj[1] = PetscMax(n_neighs-1,0);
3230   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3231   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3232 
3233   if (threshold) {
3234     PetscInt xadj_count = 0;
3235     for (i=1;i<n_neighs;i++) {
3236       if (n_shared[i] > threshold) {
3237         adjncy[xadj_count] = neighs[i];
3238         adjncy_wgt[xadj_count] = n_shared[i];
3239         xadj_count++;
3240       }
3241     }
3242     xadj[1] = xadj_count;
3243   } else {
3244     if (xadj[1]) {
3245       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3246       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3247     }
3248   }
3249   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3250   if (use_square) {
3251     for (i=0;i<xadj[1];i++) {
3252       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3253     }
3254   }
3255   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3256 
3257   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3258 
3259   /*
3260     Restrict work on active processes only.
3261   */
3262   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3263   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3264   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3265   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3266   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3267   if (color) {
3268     ierr = PetscFree(xadj);CHKERRQ(ierr);
3269     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3270     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3271   } else {
3272     PetscInt coarsening_ratio;
3273     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3274     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3275     prank = rank;
3276     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3277     /*
3278     for (i=0;i<size;i++) {
3279       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3280     }
3281     */
3282     for (i=0;i<xadj[1];i++) {
3283       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3284     }
3285     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3286     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3287     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3288 
3289     /* Partition */
3290     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3291     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3292     if (use_vwgt) {
3293       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3294       v_wgt[0] = local_size;
3295       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3296     }
3297     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3298     coarsening_ratio = size/n_subdomains;
3299     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3300     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3301     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3302     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3303 
3304     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3305     if (contiguous) {
3306       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3307     } else {
3308       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3309     }
3310     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3311     /* clean up */
3312     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3313     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3314     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3315     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3316   }
3317   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3318 
3319   /* assemble parallel IS for sends */
3320   i = 1;
3321   if (color) i=0;
3322   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3323 
3324   /* get back IS */
3325   *is_sends = ranks_send_to;
3326   PetscFunctionReturn(0);
3327 }
3328 
3329 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3330 
3331 #undef __FUNCT__
3332 #define __FUNCT__ "MatISSubassemble"
3333 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3334 {
3335   Mat                    local_mat;
3336   Mat_IS                 *matis;
3337   IS                     is_sends_internal;
3338   PetscInt               rows,cols,new_local_rows;
3339   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3340   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3341   ISLocalToGlobalMapping l2gmap;
3342   PetscInt*              l2gmap_indices;
3343   const PetscInt*        is_indices;
3344   MatType                new_local_type;
3345   /* buffers */
3346   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3347   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3348   PetscInt               *recv_buffer_idxs_local;
3349   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3350   /* MPI */
3351   MPI_Comm               comm,comm_n;
3352   PetscSubcomm           subcomm;
3353   PetscMPIInt            n_sends,n_recvs,commsize;
3354   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3355   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3356   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3357   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3358   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3359   PetscErrorCode         ierr;
3360 
3361   PetscFunctionBegin;
3362   /* TODO: add missing checks */
3363   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3364   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3365   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3366   PetscValidLogicalCollectiveInt(mat,nis,7);
3367   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3368   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3369   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3370   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3371   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3372   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3373   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3374   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3375     PetscInt mrows,mcols,mnrows,mncols;
3376     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3377     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3378     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3379     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3380     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3381     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3382   }
3383   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3384   PetscValidLogicalCollectiveInt(mat,bs,0);
3385   /* prepare IS for sending if not provided */
3386   if (!is_sends) {
3387     PetscBool pcontig = PETSC_TRUE;
3388     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3389     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3390   } else {
3391     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3392     is_sends_internal = is_sends;
3393   }
3394 
3395   /* get pointer of MATIS data */
3396   matis = (Mat_IS*)mat->data;
3397 
3398   /* get comm */
3399   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3400 
3401   /* compute number of sends */
3402   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3403   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3404 
3405   /* compute number of receives */
3406   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3407   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3408   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3409   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3410   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3411   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3412   ierr = PetscFree(iflags);CHKERRQ(ierr);
3413 
3414   /* restrict comm if requested */
3415   subcomm = 0;
3416   destroy_mat = PETSC_FALSE;
3417   if (restrict_comm) {
3418     PetscMPIInt color,subcommsize;
3419 
3420     color = 0;
3421     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3422     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3423     subcommsize = commsize - subcommsize;
3424     /* check if reuse has been requested */
3425     if (reuse == MAT_REUSE_MATRIX) {
3426       if (*mat_n) {
3427         PetscMPIInt subcommsize2;
3428         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3429         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3430         comm_n = PetscObjectComm((PetscObject)*mat_n);
3431       } else {
3432         comm_n = PETSC_COMM_SELF;
3433       }
3434     } else { /* MAT_INITIAL_MATRIX */
3435       PetscMPIInt rank;
3436 
3437       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3438       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3439       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3440       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3441       comm_n = PetscSubcommChild(subcomm);
3442     }
3443     /* flag to destroy *mat_n if not significative */
3444     if (color) destroy_mat = PETSC_TRUE;
3445   } else {
3446     comm_n = comm;
3447   }
3448 
3449   /* prepare send/receive buffers */
3450   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3451   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3452   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3453   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3454   if (nis) {
3455     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3456   }
3457 
3458   /* Get data from local matrices */
3459   if (!isdense) {
3460     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3461     /* TODO: See below some guidelines on how to prepare the local buffers */
3462     /*
3463        send_buffer_vals should contain the raw values of the local matrix
3464        send_buffer_idxs should contain:
3465        - MatType_PRIVATE type
3466        - PetscInt        size_of_l2gmap
3467        - PetscInt        global_row_indices[size_of_l2gmap]
3468        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3469     */
3470   } else {
3471     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3472     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3473     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3474     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3475     send_buffer_idxs[1] = i;
3476     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3477     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3478     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3479     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3480     for (i=0;i<n_sends;i++) {
3481       ilengths_vals[is_indices[i]] = len*len;
3482       ilengths_idxs[is_indices[i]] = len+2;
3483     }
3484   }
3485   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3486   /* additional is (if any) */
3487   if (nis) {
3488     PetscMPIInt psum;
3489     PetscInt j;
3490     for (j=0,psum=0;j<nis;j++) {
3491       PetscInt plen;
3492       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3493       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3494       psum += len+1; /* indices + lenght */
3495     }
3496     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3497     for (j=0,psum=0;j<nis;j++) {
3498       PetscInt plen;
3499       const PetscInt *is_array_idxs;
3500       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3501       send_buffer_idxs_is[psum] = plen;
3502       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3503       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3504       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3505       psum += plen+1; /* indices + lenght */
3506     }
3507     for (i=0;i<n_sends;i++) {
3508       ilengths_idxs_is[is_indices[i]] = psum;
3509     }
3510     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3511   }
3512 
3513   buf_size_idxs = 0;
3514   buf_size_vals = 0;
3515   buf_size_idxs_is = 0;
3516   for (i=0;i<n_recvs;i++) {
3517     buf_size_idxs += (PetscInt)olengths_idxs[i];
3518     buf_size_vals += (PetscInt)olengths_vals[i];
3519     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3520   }
3521   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3522   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3523   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3524 
3525   /* get new tags for clean communications */
3526   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3527   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3528   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3529 
3530   /* allocate for requests */
3531   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3532   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3533   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3534   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3535   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3536   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3537 
3538   /* communications */
3539   ptr_idxs = recv_buffer_idxs;
3540   ptr_vals = recv_buffer_vals;
3541   ptr_idxs_is = recv_buffer_idxs_is;
3542   for (i=0;i<n_recvs;i++) {
3543     source_dest = onodes[i];
3544     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3545     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3546     ptr_idxs += olengths_idxs[i];
3547     ptr_vals += olengths_vals[i];
3548     if (nis) {
3549       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3550       ptr_idxs_is += olengths_idxs_is[i];
3551     }
3552   }
3553   for (i=0;i<n_sends;i++) {
3554     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3555     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3556     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3557     if (nis) {
3558       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3559     }
3560   }
3561   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3562   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3563 
3564   /* assemble new l2g map */
3565   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3566   ptr_idxs = recv_buffer_idxs;
3567   new_local_rows = 0;
3568   for (i=0;i<n_recvs;i++) {
3569     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3570     ptr_idxs += olengths_idxs[i];
3571   }
3572   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3573   ptr_idxs = recv_buffer_idxs;
3574   new_local_rows = 0;
3575   for (i=0;i<n_recvs;i++) {
3576     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3577     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3578     ptr_idxs += olengths_idxs[i];
3579   }
3580   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3581   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3582   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3583 
3584   /* infer new local matrix type from received local matrices type */
3585   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3586   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3587   if (n_recvs) {
3588     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3589     ptr_idxs = recv_buffer_idxs;
3590     for (i=0;i<n_recvs;i++) {
3591       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3592         new_local_type_private = MATAIJ_PRIVATE;
3593         break;
3594       }
3595       ptr_idxs += olengths_idxs[i];
3596     }
3597     switch (new_local_type_private) {
3598       case MATDENSE_PRIVATE:
3599         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3600           new_local_type = MATSEQAIJ;
3601           bs = 1;
3602         } else { /* if I receive only 1 dense matrix */
3603           new_local_type = MATSEQDENSE;
3604           bs = 1;
3605         }
3606         break;
3607       case MATAIJ_PRIVATE:
3608         new_local_type = MATSEQAIJ;
3609         bs = 1;
3610         break;
3611       case MATBAIJ_PRIVATE:
3612         new_local_type = MATSEQBAIJ;
3613         break;
3614       case MATSBAIJ_PRIVATE:
3615         new_local_type = MATSEQSBAIJ;
3616         break;
3617       default:
3618         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3619         break;
3620     }
3621   } else { /* by default, new_local_type is seqdense */
3622     new_local_type = MATSEQDENSE;
3623     bs = 1;
3624   }
3625 
3626   /* create MATIS object if needed */
3627   if (reuse == MAT_INITIAL_MATRIX) {
3628     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3629     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3630   } else {
3631     /* it also destroys the local matrices */
3632     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3633   }
3634   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3635   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3636 
3637   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3638 
3639   /* Global to local map of received indices */
3640   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3641   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3642   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3643 
3644   /* restore attributes -> type of incoming data and its size */
3645   buf_size_idxs = 0;
3646   for (i=0;i<n_recvs;i++) {
3647     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3648     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3649     buf_size_idxs += (PetscInt)olengths_idxs[i];
3650   }
3651   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3652 
3653   /* set preallocation */
3654   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3655   if (!newisdense) {
3656     PetscInt *new_local_nnz=0;
3657 
3658     ptr_vals = recv_buffer_vals;
3659     ptr_idxs = recv_buffer_idxs_local;
3660     if (n_recvs) {
3661       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3662     }
3663     for (i=0;i<n_recvs;i++) {
3664       PetscInt j;
3665       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3666         for (j=0;j<*(ptr_idxs+1);j++) {
3667           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3668         }
3669       } else {
3670         /* TODO */
3671       }
3672       ptr_idxs += olengths_idxs[i];
3673     }
3674     if (new_local_nnz) {
3675       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3676       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3677       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3678       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3679       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3680       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3681     } else {
3682       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3683     }
3684     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3685   } else {
3686     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3687   }
3688 
3689   /* set values */
3690   ptr_vals = recv_buffer_vals;
3691   ptr_idxs = recv_buffer_idxs_local;
3692   for (i=0;i<n_recvs;i++) {
3693     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3694       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3695       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3696       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3697       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3698       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3699     } else {
3700       /* TODO */
3701     }
3702     ptr_idxs += olengths_idxs[i];
3703     ptr_vals += olengths_vals[i];
3704   }
3705   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3706   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3709   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3710   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3711 
3712 #if 0
3713   if (!restrict_comm) { /* check */
3714     Vec       lvec,rvec;
3715     PetscReal infty_error;
3716 
3717     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3718     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3719     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3720     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3721     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3722     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3723     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3724     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3725     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3726   }
3727 #endif
3728 
3729   /* assemble new additional is (if any) */
3730   if (nis) {
3731     PetscInt **temp_idxs,*count_is,j,psum;
3732 
3733     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3734     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3735     ptr_idxs = recv_buffer_idxs_is;
3736     psum = 0;
3737     for (i=0;i<n_recvs;i++) {
3738       for (j=0;j<nis;j++) {
3739         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3740         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3741         psum += plen;
3742         ptr_idxs += plen+1; /* shift pointer to received data */
3743       }
3744     }
3745     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3746     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3747     for (i=1;i<nis;i++) {
3748       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3749     }
3750     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3751     ptr_idxs = recv_buffer_idxs_is;
3752     for (i=0;i<n_recvs;i++) {
3753       for (j=0;j<nis;j++) {
3754         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3755         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3756         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3757         ptr_idxs += plen+1; /* shift pointer to received data */
3758       }
3759     }
3760     for (i=0;i<nis;i++) {
3761       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3762       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3763       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3764     }
3765     ierr = PetscFree(count_is);CHKERRQ(ierr);
3766     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3767     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3768   }
3769   /* free workspace */
3770   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3771   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3772   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3773   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3774   if (isdense) {
3775     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3776     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3777   } else {
3778     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3779   }
3780   if (nis) {
3781     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3782     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3783   }
3784   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3785   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3786   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3787   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3788   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3789   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3790   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3791   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3792   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3793   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3794   ierr = PetscFree(onodes);CHKERRQ(ierr);
3795   if (nis) {
3796     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3797     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3798     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3799   }
3800   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3801   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3802     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3803     for (i=0;i<nis;i++) {
3804       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3805     }
3806   }
3807   PetscFunctionReturn(0);
3808 }
3809 
3810 /* temporary hack into ksp private data structure */
3811 #include <petsc/private/kspimpl.h>
3812 
3813 #undef __FUNCT__
3814 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3815 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3816 {
3817   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3818   PC_IS                  *pcis = (PC_IS*)pc->data;
3819   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3820   MatNullSpace           CoarseNullSpace=NULL;
3821   ISLocalToGlobalMapping coarse_islg;
3822   IS                     coarse_is,*isarray;
3823   PetscInt               i,im_active=-1,active_procs=-1;
3824   PetscInt               nis,nisdofs,nisneu;
3825   PC                     pc_temp;
3826   PCType                 coarse_pc_type;
3827   KSPType                coarse_ksp_type;
3828   PetscBool              multilevel_requested,multilevel_allowed;
3829   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3830   Mat                    t_coarse_mat_is;
3831   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3832   PetscMPIInt            all_procs;
3833   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3834   PetscBool              compute_vecs = PETSC_FALSE;
3835   PetscScalar            *array;
3836   PetscErrorCode         ierr;
3837 
3838   PetscFunctionBegin;
3839   /* Assign global numbering to coarse dofs */
3840   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3841     PetscInt ocoarse_size;
3842     compute_vecs = PETSC_TRUE;
3843     ocoarse_size = pcbddc->coarse_size;
3844     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3845     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3846     /* see if we can avoid some work */
3847     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3848       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3849         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3850         coarse_reuse = PETSC_FALSE;
3851       } else { /* we can safely reuse already computed coarse matrix */
3852         coarse_reuse = PETSC_TRUE;
3853       }
3854     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3855       coarse_reuse = PETSC_FALSE;
3856     }
3857     /* reset any subassembling information */
3858     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3859     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3860   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3861     coarse_reuse = PETSC_TRUE;
3862   }
3863 
3864   /* count "active" (i.e. with positive local size) and "void" processes */
3865   im_active = !!(pcis->n);
3866   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3867   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3868   void_procs = all_procs-active_procs;
3869   csin_type_simple = PETSC_TRUE;
3870   redist = PETSC_FALSE;
3871   if (pcbddc->current_level && void_procs) {
3872     csin_ml = PETSC_TRUE;
3873     ncoarse_ml = void_procs;
3874     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3875     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3876       csin_ds = PETSC_TRUE;
3877       ncoarse_ds = pcbddc->redistribute_coarse;
3878       redist = PETSC_TRUE;
3879     } else {
3880       csin_ds = PETSC_TRUE;
3881       ncoarse_ds = active_procs;
3882       redist = PETSC_TRUE;
3883     }
3884   } else {
3885     csin_ml = PETSC_FALSE;
3886     ncoarse_ml = all_procs;
3887     if (void_procs) {
3888       csin_ds = PETSC_TRUE;
3889       ncoarse_ds = void_procs;
3890       csin_type_simple = PETSC_FALSE;
3891     } else {
3892       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3893         csin_ds = PETSC_TRUE;
3894         ncoarse_ds = pcbddc->redistribute_coarse;
3895         redist = PETSC_TRUE;
3896       } else {
3897         csin_ds = PETSC_FALSE;
3898         ncoarse_ds = all_procs;
3899       }
3900     }
3901   }
3902 
3903   /*
3904     test if we can go multilevel: three conditions must be satisfied:
3905     - we have not exceeded the number of levels requested
3906     - we can actually subassemble the active processes
3907     - we can find a suitable number of MPI processes where we can place the subassembled problem
3908   */
3909   multilevel_allowed = PETSC_FALSE;
3910   multilevel_requested = PETSC_FALSE;
3911   if (pcbddc->current_level < pcbddc->max_levels) {
3912     multilevel_requested = PETSC_TRUE;
3913     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3914       multilevel_allowed = PETSC_FALSE;
3915     } else {
3916       multilevel_allowed = PETSC_TRUE;
3917     }
3918   }
3919   /* determine number of process partecipating to coarse solver */
3920   if (multilevel_allowed) {
3921     ncoarse = ncoarse_ml;
3922     csin = csin_ml;
3923     redist = PETSC_FALSE;
3924   } else {
3925     ncoarse = ncoarse_ds;
3926     csin = csin_ds;
3927   }
3928 
3929   /* creates temporary l2gmap and IS for coarse indexes */
3930   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3931   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3932 
3933   /* creates temporary MATIS object for coarse matrix */
3934   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3935   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3936   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3937   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3938 #if 0
3939   {
3940     PetscViewer viewer;
3941     char filename[256];
3942     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3943     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3944     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3945     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3946     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3947   }
3948 #endif
3949   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3950   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3951   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3952   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3954 
3955   /* compute dofs splitting and neumann boundaries for coarse dofs */
3956   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3957     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3958     const PetscInt         *idxs;
3959     ISLocalToGlobalMapping tmap;
3960 
3961     /* create map between primal indices (in local representative ordering) and local primal numbering */
3962     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3963     /* allocate space for temporary storage */
3964     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3965     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3966     /* allocate for IS array */
3967     nisdofs = pcbddc->n_ISForDofsLocal;
3968     nisneu = !!pcbddc->NeumannBoundariesLocal;
3969     nis = nisdofs + nisneu;
3970     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3971     /* dofs splitting */
3972     for (i=0;i<nisdofs;i++) {
3973       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3974       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3975       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3976       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3977       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3978       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3979       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3980       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3981     }
3982     /* neumann boundaries */
3983     if (pcbddc->NeumannBoundariesLocal) {
3984       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3985       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3986       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3987       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3988       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3989       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3990       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3991       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3992     }
3993     /* free memory */
3994     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3995     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3996     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3997   } else {
3998     nis = 0;
3999     nisdofs = 0;
4000     nisneu = 0;
4001     isarray = NULL;
4002   }
4003   /* destroy no longer needed map */
4004   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4005 
4006   /* restrict on coarse candidates (if needed) */
4007   coarse_mat_is = NULL;
4008   if (csin) {
4009     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4010       if (redist) {
4011         PetscMPIInt rank;
4012         PetscInt    spc,n_spc_p1,dest[1],destsize;
4013 
4014         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4015         spc = active_procs/ncoarse;
4016         n_spc_p1 = active_procs%ncoarse;
4017         if (im_active) {
4018           destsize = 1;
4019           if (rank > n_spc_p1*(spc+1)-1) {
4020             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4021           } else {
4022             dest[0] = rank/(spc+1);
4023           }
4024         } else {
4025           destsize = 0;
4026         }
4027         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4028       } else if (csin_type_simple) {
4029         PetscMPIInt rank;
4030         PetscInt    issize,isidx;
4031 
4032         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4033         if (im_active) {
4034           issize = 1;
4035           isidx = (PetscInt)rank;
4036         } else {
4037           issize = 0;
4038           isidx = -1;
4039         }
4040         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4041       } else { /* get a suitable subassembling pattern from MATIS code */
4042         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4043       }
4044 
4045       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4046       if (!redist || ncoarse <= void_procs) {
4047         PetscInt ncoarse_cand,tissize,*nisindices;
4048         PetscInt *coarse_candidates;
4049         const PetscInt* tisindices;
4050 
4051         /* get coarse candidates' ranks in pc communicator */
4052         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4053         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4054         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4055           if (!coarse_candidates[i]) {
4056             coarse_candidates[ncoarse_cand++]=i;
4057           }
4058         }
4059         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4060 
4061 
4062         if (pcbddc->dbg_flag) {
4063           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4064           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4065           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4066           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4067           for (i=0;i<ncoarse_cand;i++) {
4068             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4069           }
4070           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4071           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4072         }
4073         /* shift the pattern on coarse candidates */
4074         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4075         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4076         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4077         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4078         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4079         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4080         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4081       }
4082       if (pcbddc->dbg_flag) {
4083         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4084         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4085         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4086         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4087       }
4088     }
4089     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4090     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4091   } else {
4092     if (pcbddc->dbg_flag) {
4093       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4094       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4095       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4096     }
4097     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4098     coarse_mat_is = t_coarse_mat_is;
4099   }
4100 
4101   /* create local to global scatters for coarse problem */
4102   if (compute_vecs) {
4103     PetscInt lrows;
4104     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4105     if (coarse_mat_is) {
4106       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4107     } else {
4108       lrows = 0;
4109     }
4110     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4111     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4112     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4113     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4114     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4115   }
4116   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4117   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4118 
4119   /* set defaults for coarse KSP and PC */
4120   if (multilevel_allowed) {
4121     coarse_ksp_type = KSPRICHARDSON;
4122     coarse_pc_type = PCBDDC;
4123   } else {
4124     coarse_ksp_type = KSPPREONLY;
4125     coarse_pc_type = PCREDUNDANT;
4126   }
4127 
4128   /* print some info if requested */
4129   if (pcbddc->dbg_flag) {
4130     if (!multilevel_allowed) {
4131       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4132       if (multilevel_requested) {
4133         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4134       } else if (pcbddc->max_levels) {
4135         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4136       }
4137       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4138     }
4139   }
4140 
4141   /* create the coarse KSP object only once with defaults */
4142   if (coarse_mat_is) {
4143     MatReuse coarse_mat_reuse;
4144     PetscViewer dbg_viewer = NULL;
4145     if (pcbddc->dbg_flag) {
4146       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4147       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4148     }
4149     if (!pcbddc->coarse_ksp) {
4150       char prefix[256],str_level[16];
4151       size_t len;
4152       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4153       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4154       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4155       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4156       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4157       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4158       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4159       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4160       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4161       /* prefix */
4162       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4163       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4164       if (!pcbddc->current_level) {
4165         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4166         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4167       } else {
4168         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4169         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4170         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4171         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4172         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4173         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4174       }
4175       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4176       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4177       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4178       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4179       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4180       /* allow user customization */
4181       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4182     }
4183     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4184     if (nisdofs) {
4185       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4186       for (i=0;i<nisdofs;i++) {
4187         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4188       }
4189     }
4190     if (nisneu) {
4191       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4192       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4193     }
4194 
4195     /* get some info after set from options */
4196     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4197     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4198     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4199     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4200     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4201       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4202       isbddc = PETSC_FALSE;
4203     }
4204     if (isredundant) {
4205       KSP inner_ksp;
4206       PC inner_pc;
4207       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4208       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4209       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4210     }
4211 
4212     /* assemble coarse matrix */
4213     if (coarse_reuse) {
4214       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4215       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4216       coarse_mat_reuse = MAT_REUSE_MATRIX;
4217     } else {
4218       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4219     }
4220     if (isbddc || isnn) {
4221       if (pcbddc->coarsening_ratio > 1) {
4222         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4223           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4224           if (pcbddc->dbg_flag) {
4225             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4226             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4227             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4228             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4229           }
4230         }
4231         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4232       } else {
4233         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4234         coarse_mat = coarse_mat_is;
4235       }
4236     } else {
4237       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4238     }
4239     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4240 
4241     /* propagate symmetry info to coarse matrix */
4242     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4243     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4244 
4245     /* set operators */
4246     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4247     if (pcbddc->dbg_flag) {
4248       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4249     }
4250   } else { /* processes non partecipating to coarse solver (if any) */
4251     coarse_mat = 0;
4252   }
4253   ierr = PetscFree(isarray);CHKERRQ(ierr);
4254 #if 0
4255   {
4256     PetscViewer viewer;
4257     char filename[256];
4258     sprintf(filename,"coarse_mat.m");
4259     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4260     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4261     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4262     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4263   }
4264 #endif
4265 
4266   /* Compute coarse null space (special handling by BDDC only) */
4267   if (pcbddc->NullSpace) {
4268     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4269   }
4270 
4271   if (pcbddc->coarse_ksp) {
4272     Vec crhs,csol;
4273     PetscBool ispreonly;
4274     if (CoarseNullSpace) {
4275       if (isbddc) {
4276         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4277       } else {
4278         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4279       }
4280     }
4281     /* setup coarse ksp */
4282     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4283     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4284     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4285     /* hack */
4286     if (!csol) {
4287       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4288     }
4289     if (!crhs) {
4290       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4291     }
4292     /* Check coarse problem if in debug mode or if solving with an iterative method */
4293     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4294     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4295       KSP       check_ksp;
4296       KSPType   check_ksp_type;
4297       PC        check_pc;
4298       Vec       check_vec,coarse_vec;
4299       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4300       PetscInt  its;
4301       PetscBool compute_eigs;
4302       PetscReal *eigs_r,*eigs_c;
4303       PetscInt  neigs;
4304       const char *prefix;
4305 
4306       /* Create ksp object suitable for estimation of extreme eigenvalues */
4307       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4308       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4309       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4310       if (ispreonly) {
4311         check_ksp_type = KSPPREONLY;
4312         compute_eigs = PETSC_FALSE;
4313       } else {
4314         check_ksp_type = KSPGMRES;
4315         compute_eigs = PETSC_TRUE;
4316       }
4317       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4318       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4319       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4320       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4321       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4322       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4323       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4324       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4325       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4326       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4327       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4328       /* create random vec */
4329       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4330       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4331       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4332       if (CoarseNullSpace) {
4333         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4334       }
4335       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4336       /* solve coarse problem */
4337       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4338       if (CoarseNullSpace) {
4339         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4340       }
4341       /* set eigenvalue estimation if preonly has not been requested */
4342       if (compute_eigs) {
4343         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4344         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4345         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4346         lambda_max = eigs_r[neigs-1];
4347         lambda_min = eigs_r[0];
4348         if (pcbddc->use_coarse_estimates) {
4349           if (lambda_max>lambda_min) {
4350             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4351             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4352           }
4353         }
4354       }
4355 
4356       /* check coarse problem residual error */
4357       if (pcbddc->dbg_flag) {
4358         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4359         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4360         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4361         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4362         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4363         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4364         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4365         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4366         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4367         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4368         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4369         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4370         if (compute_eigs) {
4371           PetscReal lambda_max_s,lambda_min_s;
4372           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4373           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4374           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4375           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4376           for (i=0;i<neigs;i++) {
4377             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4378           }
4379         }
4380         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4381         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4382       }
4383       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4384       if (compute_eigs) {
4385         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4386         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4387       }
4388     }
4389   }
4390   /* print additional info */
4391   if (pcbddc->dbg_flag) {
4392     /* waits until all processes reaches this point */
4393     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4394     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4395     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4396   }
4397 
4398   /* free memory */
4399   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4400   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4401   PetscFunctionReturn(0);
4402 }
4403 
4404 #undef __FUNCT__
4405 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4406 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4407 {
4408   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4409   PC_IS*         pcis = (PC_IS*)pc->data;
4410   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4411   PetscInt       i,local_size,coarse_size=0;
4412   PetscInt       *local_primal_indices=NULL;
4413   PetscInt       *t_local_primal_indices=NULL;
4414   PetscErrorCode ierr;
4415 
4416   PetscFunctionBegin;
4417   /* Compute global number of coarse dofs */
4418   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4419     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4420   }
4421   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4422   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4423   local_size = 0;
4424   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4425     PetscInt j;
4426     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4427   }
4428   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4429   if (local_size != pcbddc->local_primal_size) {
4430     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4431   }
4432 
4433   /* check numbering */
4434   if (pcbddc->dbg_flag) {
4435     PetscScalar coarsesum,*array;
4436     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4437 
4438     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4439     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4440     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4441     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4442     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4443     for (i=0;i<pcbddc->local_primal_size;i++) {
4444       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4445     }
4446     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4447     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4448     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4449     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4450     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4451     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4452     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4453     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4454     for (i=0;i<pcis->n;i++) {
4455       if (array[i] == 1.0) {
4456         set_error = PETSC_TRUE;
4457         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4458       }
4459     }
4460     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4461     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4462     for (i=0;i<pcis->n;i++) {
4463       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4464     }
4465     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4466     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4467     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4468     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4469     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4470     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4471     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4472       PetscInt *gidxs;
4473 
4474       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4475       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4476       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4477       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4478       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4479       for (i=0;i<pcbddc->local_primal_size;i++) {
4480         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4481       }
4482       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4483       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4484     }
4485     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4486     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4487   }
4488   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4489   /* get back data */
4490   *coarse_size_n = coarse_size;
4491   *local_primal_indices_n = local_primal_indices;
4492   PetscFunctionReturn(0);
4493 }
4494 
4495 #undef __FUNCT__
4496 #define __FUNCT__ "PCBDDCGlobalToLocal"
4497 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4498 {
4499   IS             localis_t;
4500   PetscInt       i,lsize,*idxs,n;
4501   PetscScalar    *vals;
4502   PetscErrorCode ierr;
4503 
4504   PetscFunctionBegin;
4505   /* get indices in local ordering exploiting local to global map */
4506   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4507   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4508   for (i=0;i<lsize;i++) vals[i] = 1.0;
4509   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4510   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4511   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4512   if (idxs) { /* multilevel guard */
4513     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4514   }
4515   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4516   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4517   ierr = PetscFree(vals);CHKERRQ(ierr);
4518   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4519   /* now compute set in local ordering */
4520   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4521   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4522   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4523   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4524   for (i=0,lsize=0;i<n;i++) {
4525     if (PetscRealPart(vals[i]) > 0.5) {
4526       lsize++;
4527     }
4528   }
4529   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4530   for (i=0,lsize=0;i<n;i++) {
4531     if (PetscRealPart(vals[i]) > 0.5) {
4532       idxs[lsize++] = i;
4533     }
4534   }
4535   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4536   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4537   *localis = localis_t;
4538   PetscFunctionReturn(0);
4539 }
4540 
4541 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4542 #undef __FUNCT__
4543 #define __FUNCT__ "PCBDDCMatMult_Private"
4544 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4545 {
4546   PCBDDCChange_ctx change_ctx;
4547   PetscErrorCode   ierr;
4548 
4549   PetscFunctionBegin;
4550   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4551   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4552   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4553   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 #undef __FUNCT__
4558 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4559 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4560 {
4561   PCBDDCChange_ctx change_ctx;
4562   PetscErrorCode   ierr;
4563 
4564   PetscFunctionBegin;
4565   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4566   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4567   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4568   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 #undef __FUNCT__
4573 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4574 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4575 {
4576   PC_IS               *pcis=(PC_IS*)pc->data;
4577   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4578   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4579   Mat                 S_j;
4580   PetscInt            *used_xadj,*used_adjncy;
4581   PetscBool           free_used_adj;
4582   PetscErrorCode      ierr;
4583 
4584   PetscFunctionBegin;
4585   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4586   free_used_adj = PETSC_FALSE;
4587   if (pcbddc->sub_schurs_layers == -1) {
4588     used_xadj = NULL;
4589     used_adjncy = NULL;
4590   } else {
4591     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4592       used_xadj = pcbddc->mat_graph->xadj;
4593       used_adjncy = pcbddc->mat_graph->adjncy;
4594     } else if (pcbddc->computed_rowadj) {
4595       used_xadj = pcbddc->mat_graph->xadj;
4596       used_adjncy = pcbddc->mat_graph->adjncy;
4597     } else {
4598       PetscBool      flg_row=PETSC_FALSE;
4599       const PetscInt *xadj,*adjncy;
4600       PetscInt       nvtxs;
4601 
4602       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4603       if (flg_row) {
4604         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4605         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4606         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4607         free_used_adj = PETSC_TRUE;
4608       } else {
4609         pcbddc->sub_schurs_layers = -1;
4610         used_xadj = NULL;
4611         used_adjncy = NULL;
4612       }
4613       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4614     }
4615   }
4616 
4617   /* setup sub_schurs data */
4618   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4619   if (!sub_schurs->use_mumps) {
4620     /* pcbddc->ksp_D up to date only if not using MUMPS */
4621     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4622     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4623   } else {
4624     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4625   }
4626   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4627 
4628   /* free adjacency */
4629   if (free_used_adj) {
4630     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4631   }
4632   PetscFunctionReturn(0);
4633 }
4634 
4635 #undef __FUNCT__
4636 #define __FUNCT__ "PCBDDCInitSubSchurs"
4637 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4638 {
4639   PC_IS               *pcis=(PC_IS*)pc->data;
4640   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4641   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4642   PCBDDCGraph         graph;
4643   PetscErrorCode      ierr;
4644 
4645   PetscFunctionBegin;
4646   /* attach interface graph for determining subsets */
4647   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4648     IS verticesIS;
4649 
4650     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4651     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4652     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4654     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4655     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4656 /*
4657     if (pcbddc->dbg_flag) {
4658       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4659     }
4660 */
4661   } else {
4662     graph = pcbddc->mat_graph;
4663   }
4664 
4665   /* sub_schurs init */
4666   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4667 
4668   /* free graph struct */
4669   if (pcbddc->sub_schurs_rebuild) {
4670     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4671   }
4672   PetscFunctionReturn(0);
4673 }
4674