xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision fd14bc5192b9f932f071d2205eea28b1a41e3485)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31   if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
32     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
33   }
34 
35   if (pcbddc->dbg_flag) {
36     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
37     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
38     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
39     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
40   }
41 
42   /* max size of subsets */
43   mss = 0;
44   for (i=0;i<sub_schurs->n_subs;i++) {
45     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
46       PetscInt subset_size;
47       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
48       mss = PetscMax(mss,subset_size);
49     }
50   }
51 
52   /* min/max and threshold */
53   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
54   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
55   nmax = PetscMax(nmin,nmax);
56   if (pcbddc->adaptive_threshold > 1.0) {
57     thresh = 1.0/pcbddc->adaptive_threshold;
58   } else {
59     thresh = 1.0;
60   }
61 
62   /* allocate lapack workspace */
63   cum = cum2 = 0;
64   maxneigs = 0;
65   for (i=0;i<sub_schurs->n_subs;i++) {
66     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
67       PetscInt n,subset_size;
68       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
69       n = PetscMin(subset_size,nmax);
70       cum += subset_size*n;
71       cum2 += n;
72       maxneigs = PetscMax(maxneigs,n);
73     }
74   }
75 
76   if (mss) {
77     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
78       PetscBLASInt B_itype = 1;
79       PetscBLASInt B_N = mss;
80       PetscReal    zero = 0.0;
81       PetscReal    eps = 0.0; /* dlamch? */
82 
83       B_lwork = -1;
84       S = NULL;
85       St = NULL;
86       eigs = NULL;
87       eigv = NULL;
88       B_iwork = NULL;
89       B_ifail = NULL;
90       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
91 #if defined(PETSC_USE_COMPLEX)
92       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
93 #else
94       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
95 #endif
96       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
97       ierr = PetscFPTrapPop();CHKERRQ(ierr);
98     } else {
99         /* TODO */
100     }
101   } else {
102     lwork = 0;
103   }
104 
105   nv = 0;
106   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
107     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
108   }
109   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
110   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
111                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
112 #if defined(PETSC_USE_COMPLEX)
113   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
114 #endif
115   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
116   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
117                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
118                       nv+cum,&pcbddc->adaptive_constraints_idxs,
119                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
120   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
121 
122   maxneigs = 0;
123   cum = cum2 = cumarray = 0;
124   if (sub_schurs->is_Ej_com) {
125     const PetscInt *idxs;
126 
127     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
128     for (cum=0;cum<nv;cum++) {
129       pcbddc->adaptive_constraints_n[cum] = 1;
130       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
131       pcbddc->adaptive_constraints_ptrs[cum] = cum;
132       pcbddc->adaptive_constraints_data[cum] = 1.0;
133     }
134     cum2 = cum;
135     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
136   }
137 
138   if (mss) { /* multilevel */
139     if (pcbddc->use_deluxe_scaling) {
140       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
141     } else {
142       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
143     }
144     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
145   }
146 
147   if (pcbddc->dbg_flag) {
148     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
149   }
150 
151   for (i=0;i<sub_schurs->n_subs;i++) {
152     PetscInt j,subset_size;
153 
154     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
155     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
156       const PetscInt *idxs;
157       PetscScalar    one = 1.0,scalar_zero = 0.0;
158       PetscReal      zero=0.0;
159       PetscBLASInt   B_N;
160 
161       /* S should be copied since we need it for deluxe scaling */
162       if (sub_schurs->is_hermitian) {
163         PetscInt j;
164         for (j=0;j<subset_size;j++) {
165           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
166         }
167         for (j=0;j<subset_size;j++) {
168           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169         }
170       } else {
171         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
173       }
174       /* is always this the right matrix? */
175       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176 
177       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
178       /* St = Starray+cumarray; */
179 
180       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
181         PetscBLASInt B_itype = 1;
182         PetscBLASInt B_IL = 1, B_IU;
183         PetscReal    eps = -1.0; /* dlamch? */
184         PetscInt     nmin_s;
185 
186         /* ask for eigenvalues lower than thresh */
187         if (pcbddc->dbg_flag) {
188           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs);
189         }
190         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
191         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
192 #if defined(PETSC_USE_COMPLEX)
193         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
194 #else
195         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
196 #endif
197         ierr = PetscFPTrapPop();CHKERRQ(ierr);
198         if (B_ierr) {
199           if (B_ierr < 0 ) {
200             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
201           } else if (B_ierr <= B_N) {
202             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
203           } else {
204             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
205           }
206         }
207 
208         if (B_neigs > nmax) {
209           if (pcbddc->dbg_flag) {
210             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
211           }
212           B_neigs = nmax;
213         }
214 
215         nmin_s = PetscMin(nmin,B_N);
216         if (B_neigs < nmin_s) {
217           PetscBLASInt B_neigs2;
218 
219           B_IL = B_neigs + 1;
220           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
221           if (pcbddc->dbg_flag) {
222             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
223           }
224           if (sub_schurs->is_hermitian) {
225             PetscInt j;
226             for (j=0;j<subset_size;j++) {
227               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
228             }
229             for (j=0;j<subset_size;j++) {
230               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
231             }
232           } else {
233             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
234             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
235           }
236           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
237 #if defined(PETSC_USE_COMPLEX)
238           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
239 #else
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
241 #endif
242           ierr = PetscFPTrapPop();CHKERRQ(ierr);
243           B_neigs += B_neigs2;
244         }
245         if (B_ierr) {
246           if (B_ierr < 0 ) {
247             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
248           } else if (B_ierr <= B_N) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
250           } else {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
252           }
253         }
254         if (pcbddc->dbg_flag) {
255           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
256           for (j=0;j<B_neigs;j++) {
257             if (eigs[j] == 0.0) {
258               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
259             } else {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
261             }
262           }
263         }
264       } else {
265           /* TODO */
266       }
267       maxneigs = PetscMax(B_neigs,maxneigs);
268       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
269 
270       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
271       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
280           }
281         }
282       }
283       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
284       for (j=0;j<B_neigs;j++) {
285 #if 0
286         {
287           PetscBLASInt Blas_N,Blas_one = 1.0;
288           PetscScalar norm;
289           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
290           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
291           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
292             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
293           } else {
294             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
295           }
296           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
297         }
298 #endif
299         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
300         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
301         cum2 += subset_size;
302       }
303       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
304     }
305     /* shift for next computation */
306     cumarray += subset_size*subset_size;
307   }
308   if (pcbddc->dbg_flag) {
309     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
310   }
311   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
312   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
313 
314   if (mss) {
315     if (pcbddc->use_deluxe_scaling) {
316       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
317     } else {
318       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
319     }
320     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
321   }
322   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     MatType     checkmattype=MATSEQAIJ;
974     PetscReal   real_value;
975 
976     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
977     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
978     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
979     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
980     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
981     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
982     if (unsymmetric_check) {
983       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
984       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
985     }
986     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
987 
988     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
989     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
990     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
991     if (unsymmetric_check) {
992       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
993       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
994       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
995       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004     } else {
1005       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1006       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1007       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1008       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1009       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013     }
1014     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1015     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1016     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1017     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
1020 
1021     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1022     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1024     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1025     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1026     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1027     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1028     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1029     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1030     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1031     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1032     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1033     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1034     if (unsymmetric_check) {
1035       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1036       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1037     }
1038     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1039   }
1040 
1041   /* free memory */
1042   ierr = PetscFree(work);CHKERRQ(ierr);
1043   if (n_vertices) {
1044     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1045   }
1046   if (n_constraints) {
1047     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1048   }
1049   /* get back data */
1050   *coarse_submat_vals_n = coarse_submat_vals;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1056 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1057 {
1058   Mat            *work_mat;
1059   IS             isrow_s,iscol_s;
1060   PetscBool      rsorted,csorted;
1061   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1066   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1067   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1068   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1069 
1070   if (!rsorted) {
1071     const PetscInt *idxs;
1072     PetscInt *idxs_sorted,i;
1073 
1074     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1075     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1076     for (i=0;i<rsize;i++) {
1077       idxs_perm_r[i] = i;
1078     }
1079     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1080     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1081     for (i=0;i<rsize;i++) {
1082       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1083     }
1084     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1085     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1086   } else {
1087     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1088     isrow_s = isrow;
1089   }
1090 
1091   if (!csorted) {
1092     if (isrow == iscol) {
1093       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1094       iscol_s = isrow_s;
1095     } else {
1096       const PetscInt *idxs;
1097       PetscInt *idxs_sorted,i;
1098 
1099       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1100       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1101       for (i=0;i<csize;i++) {
1102         idxs_perm_c[i] = i;
1103       }
1104       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1105       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1106       for (i=0;i<csize;i++) {
1107         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1108       }
1109       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1110       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1111     }
1112   } else {
1113     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1114     iscol_s = iscol;
1115   }
1116 
1117   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1118 
1119   if (!rsorted || !csorted) {
1120     Mat      new_mat;
1121     IS       is_perm_r,is_perm_c;
1122 
1123     if (!rsorted) {
1124       PetscInt *idxs_r,i;
1125       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1126       for (i=0;i<rsize;i++) {
1127         idxs_r[idxs_perm_r[i]] = i;
1128       }
1129       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1130       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1131     } else {
1132       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1133     }
1134     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1135 
1136     if (!csorted) {
1137       if (isrow_s == iscol_s) {
1138         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1139         is_perm_c = is_perm_r;
1140       } else {
1141         PetscInt *idxs_c,i;
1142         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1143         for (i=0;i<csize;i++) {
1144           idxs_c[idxs_perm_c[i]] = i;
1145         }
1146         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1147         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1148       }
1149     } else {
1150       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1151     }
1152     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1153 
1154     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1155     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1156     work_mat[0] = new_mat;
1157     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1158     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1159   }
1160 
1161   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1162   *B = work_mat[0];
1163   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1164   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1165   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1171 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1172 {
1173   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1174   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1175   Mat            new_mat;
1176   IS             is_local,is_global;
1177   PetscInt       local_size;
1178   PetscBool      isseqaij;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1183   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1184   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1185   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1186   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1187   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1188   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1189 
1190   /* check */
1191   if (pcbddc->dbg_flag) {
1192     Vec       x,x_change;
1193     PetscReal error;
1194 
1195     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1196     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1197     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1198     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1199     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1201     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1204     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1205     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1206     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1207     ierr = VecDestroy(&x);CHKERRQ(ierr);
1208     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1209   }
1210 
1211   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1212   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1213   if (isseqaij) {
1214     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1215   } else {
1216     Mat work_mat;
1217     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1218     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1219     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1220   }
1221   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1222   /*
1223   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1224   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1225   */
1226   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1232 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1233 {
1234   PC_IS*         pcis = (PC_IS*)(pc->data);
1235   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1236   IS             is_aux1,is_aux2;
1237   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1238   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1239   PetscInt       vbs,bs;
1240   PetscBT        bitmask;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   /*
1245     No need to setup local scatters if
1246       - primal space is unchanged
1247         AND
1248       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1249         AND
1250       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1251   */
1252   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1253     PetscFunctionReturn(0);
1254   }
1255   /* destroy old objects */
1256   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1257   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1258   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1259   /* Set Non-overlapping dimensions */
1260   n_B = pcis->n_B; n_D = pcis->n - n_B;
1261   n_vertices = pcbddc->n_actual_vertices;
1262   /* create auxiliary bitmask */
1263   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1264   for (i=0;i<n_vertices;i++) {
1265     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1266   }
1267 
1268   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1269   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1270   for (i=0, n_R=0; i<pcis->n; i++) {
1271     if (!PetscBTLookup(bitmask,i)) {
1272       idx_R_local[n_R] = i;
1273       n_R++;
1274     }
1275   }
1276 
1277   /* Block code */
1278   vbs = 1;
1279   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1280   if (bs>1 && !(n_vertices%bs)) {
1281     PetscBool is_blocked = PETSC_TRUE;
1282     PetscInt  *vary;
1283     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1284     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1285     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1286     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1287     for (i=0; i<n_vertices; i++) {
1288       if (vary[i]!=0 && vary[i]!=bs) {
1289         is_blocked = PETSC_FALSE;
1290         break;
1291       }
1292     }
1293     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1294       vbs = bs;
1295       for (i=0;i<n_R/vbs;i++) {
1296         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1297       }
1298     }
1299     ierr = PetscFree(vary);CHKERRQ(ierr);
1300   }
1301   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1302   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1303 
1304   /* print some info if requested */
1305   if (pcbddc->dbg_flag) {
1306     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1307     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1308     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1309     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1310     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1311     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1312     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1313     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1314   }
1315 
1316   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1317   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1318   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1319   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1320   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1321   for (i=0; i<n_D; i++) {
1322     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1323   }
1324   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1325   for (i=0, j=0; i<n_R; i++) {
1326     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1327       aux_array1[j++] = i;
1328     }
1329   }
1330   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1331   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1332   for (i=0, j=0; i<n_B; i++) {
1333     if (!PetscBTLookup(bitmask,is_indices[i])) {
1334       aux_array2[j++] = i;
1335     }
1336   }
1337   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1338   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1339   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1340   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1341   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1342 
1343   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1344     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1345     for (i=0, j=0; i<n_R; i++) {
1346       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1347         aux_array1[j++] = i;
1348       }
1349     }
1350     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1352     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1353   }
1354   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1355   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1362 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1363 {
1364   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1365   PC_IS          *pcis = (PC_IS*)pc->data;
1366   PC             pc_temp;
1367   Mat            A_RR;
1368   MatReuse       reuse;
1369   PetscScalar    m_one = -1.0;
1370   PetscReal      value;
1371   PetscInt       n_D,n_R,ibs,mbs;
1372   PetscBool      use_exact,use_exact_reduced,issbaij;
1373   PetscErrorCode ierr;
1374   /* prefixes stuff */
1375   char           dir_prefix[256],neu_prefix[256],str_level[16];
1376   size_t         len;
1377 
1378   PetscFunctionBegin;
1379 
1380   /* compute prefixes */
1381   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1382   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1383   if (!pcbddc->current_level) {
1384     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1385     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1386     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1387     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1388   } else {
1389     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1390     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1391     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1392     len -= 15; /* remove "pc_bddc_coarse_" */
1393     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1394     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1395     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1396     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1397     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1398     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1399     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1400     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1401   }
1402 
1403   /* DIRICHLET PROBLEM */
1404   if (dirichlet) {
1405     /* Matrix for Dirichlet problem is pcis->A_II */
1406     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1407     if (!pcbddc->ksp_D) { /* create object if not yet build */
1408       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1409       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1410       /* default */
1411       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1412       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1413       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1414       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1415       if (issbaij) {
1416         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1417       } else {
1418         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1419       }
1420       /* Allow user's customization */
1421       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1422       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1423     }
1424     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1425     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1426     if (!n_D) {
1427       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1428       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1429     }
1430     /* Set Up KSP for Dirichlet problem of BDDC */
1431     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1432     /* set ksp_D into pcis data */
1433     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1434     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1435     pcis->ksp_D = pcbddc->ksp_D;
1436   }
1437 
1438   /* NEUMANN PROBLEM */
1439   A_RR = 0;
1440   if (neumann) {
1441     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1442     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1443     if (pcbddc->ksp_R) { /* already created ksp */
1444       PetscInt nn_R;
1445       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1446       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1447       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1448       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1449         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1450         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1451         reuse = MAT_INITIAL_MATRIX;
1452       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1453         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1454           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1455           reuse = MAT_INITIAL_MATRIX;
1456         } else { /* safe to reuse the matrix */
1457           reuse = MAT_REUSE_MATRIX;
1458         }
1459       }
1460       /* last check */
1461       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1462         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1463         reuse = MAT_INITIAL_MATRIX;
1464       }
1465     } else { /* first time, so we need to create the matrix */
1466       reuse = MAT_INITIAL_MATRIX;
1467     }
1468     /* extract A_RR */
1469     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1470     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1471     if (ibs != mbs) {
1472       Mat newmat;
1473       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1474       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1475       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1476     } else {
1477       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1478     }
1479     if (!pcbddc->ksp_R) { /* create object if not present */
1480       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1481       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1482       /* default */
1483       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1484       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1485       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1486       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1487       if (issbaij) {
1488         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1489       } else {
1490         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1491       }
1492       /* Allow user's customization */
1493       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1494       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1495     }
1496     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1497     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1498     if (!n_R) {
1499       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1500       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1501     }
1502     /* Set Up KSP for Neumann problem of BDDC */
1503     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1504   }
1505 
1506   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1507   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1508     if (pcbddc->dbg_flag) {
1509       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1510       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1511       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1512     }
1513     if (dirichlet) { /* Dirichlet */
1514       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1515       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1516       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1517       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1518       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1519       /* need to be adapted? */
1520       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1521       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1522       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1523       /* print info */
1524       if (pcbddc->dbg_flag) {
1525         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1526         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1527       }
1528       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1529         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1530       }
1531     }
1532     if (neumann) { /* Neumann */
1533       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1534       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1535       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1536       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1537       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1538       /* need to be adapted? */
1539       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1540       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1541       /* print info */
1542       if (pcbddc->dbg_flag) {
1543         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1544         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1545       }
1546       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1547         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1548       }
1549     }
1550   }
1551   /* free Neumann problem's matrix */
1552   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1558 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1559 {
1560   PetscErrorCode ierr;
1561   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1562 
1563   PetscFunctionBegin;
1564   if (applytranspose) {
1565     if (pcbddc->local_auxmat1) {
1566       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1567       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1568     }
1569     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1570   } else {
1571     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1572     if (pcbddc->local_auxmat1) {
1573       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1574       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1581 #undef __FUNCT__
1582 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1583 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1584 {
1585   PetscErrorCode ierr;
1586   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1587   PC_IS*            pcis = (PC_IS*)  (pc->data);
1588   const PetscScalar zero = 0.0;
1589 
1590   PetscFunctionBegin;
1591   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1592   if (applytranspose) {
1593     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1594     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1595   } else {
1596     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1597     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1598   }
1599   /* start communications from local primal nodes to rhs of coarse solver */
1600   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1601   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1602   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603 
1604   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1605   /* TODO remove null space when doing multilevel */
1606   if (pcbddc->coarse_ksp) {
1607     if (applytranspose) {
1608       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1609     } else {
1610       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1611     }
1612   }
1613 
1614   /* Local solution on R nodes */
1615   if (pcis->n) {
1616     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1617     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1618     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1619     if (pcbddc->switch_static) {
1620       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1621       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1622     }
1623     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1624     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1625     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) {
1628       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630     }
1631   }
1632 
1633   /* communications from coarse sol to local primal nodes */
1634   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1635   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1636 
1637   /* Sum contributions from two levels */
1638   if (applytranspose) {
1639     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1640     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1641   } else {
1642     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1643     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1644   }
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #undef __FUNCT__
1649 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1650 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1651 {
1652   PetscErrorCode ierr;
1653   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1654   PetscScalar    *array;
1655   Vec            from,to;
1656 
1657   PetscFunctionBegin;
1658   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1659     from = pcbddc->coarse_vec;
1660     to = pcbddc->vec1_P;
1661     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1662       Vec tvec;
1663 
1664       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1665       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1666       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1667       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1668       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1669       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1670     }
1671   } else { /* from local to global -> put data in coarse right hand side */
1672     from = pcbddc->vec1_P;
1673     to = pcbddc->coarse_vec;
1674   }
1675   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1681 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1682 {
1683   PetscErrorCode ierr;
1684   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1685   PetscScalar    *array;
1686   Vec            from,to;
1687 
1688   PetscFunctionBegin;
1689   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1690     from = pcbddc->coarse_vec;
1691     to = pcbddc->vec1_P;
1692   } else { /* from local to global -> put data in coarse right hand side */
1693     from = pcbddc->vec1_P;
1694     to = pcbddc->coarse_vec;
1695   }
1696   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1697   if (smode == SCATTER_FORWARD) {
1698     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1699       Vec tvec;
1700 
1701       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1702       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1703       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1704       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1705     }
1706   } else {
1707     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1708      ierr = VecResetArray(from);CHKERRQ(ierr);
1709     }
1710   }
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 /* uncomment for testing purposes */
1715 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1716 #undef __FUNCT__
1717 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1718 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1719 {
1720   PetscErrorCode    ierr;
1721   PC_IS*            pcis = (PC_IS*)(pc->data);
1722   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1723   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1724   /* one and zero */
1725   PetscScalar       one=1.0,zero=0.0;
1726   /* space to store constraints and their local indices */
1727   PetscScalar       *temp_quadrature_constraint;
1728   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1729   /* iterators */
1730   PetscInt          i,j,k,total_counts,temp_start_ptr;
1731   /* BLAS integers */
1732   PetscBLASInt      lwork,lierr;
1733   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1734   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1735   /* reuse */
1736   PetscInt          olocal_primal_size;
1737   PetscInt          *oprimal_indices_local_idxs;
1738   /* change of basis */
1739   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1740   PetscBool         boolforchange,qr_needed;
1741   PetscBT           touched,change_basis,qr_needed_idx;
1742   /* auxiliary stuff */
1743   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1744   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1745   PetscScalar       *temp_quadrature_constraint_work=NULL;
1746   /* some quantities */
1747   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1748   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1749 
1750   PetscFunctionBegin;
1751   /* Destroy Mat objects computed previously */
1752   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1753   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1754 
1755   /* print some info */
1756   if (pcbddc->dbg_flag) {
1757     IS       vertices;
1758     PetscInt nv,nedges,nfaces;
1759     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1760     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1761     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1762     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1763     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1764     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1765     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1766     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1767     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1768   }
1769 
1770   if (!pcbddc->adaptive_selection) {
1771     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1772     MatNullSpace nearnullsp;
1773     const Vec    *nearnullvecs;
1774     Vec          *localnearnullsp;
1775     PetscScalar  *array;
1776     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1777     PetscBool    nnsp_has_cnst;
1778     /* LAPACK working arrays for SVD or POD */
1779     PetscBool    skip_lapack;
1780     PetscScalar  *work;
1781     PetscReal    *singular_vals;
1782 #if defined(PETSC_USE_COMPLEX)
1783     PetscReal    *rwork;
1784 #endif
1785 #if defined(PETSC_MISSING_LAPACK_GESVD)
1786     PetscScalar  *temp_basis,*correlation_mat;
1787 #else
1788     PetscBLASInt dummy_int;
1789     PetscScalar  dummy_scalar;
1790 #endif
1791 
1792     /* Get index sets for faces, edges and vertices from graph */
1793     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1794     /* free unneeded index sets */
1795     if (!pcbddc->use_vertices) {
1796       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1797     }
1798     if (!pcbddc->use_edges) {
1799       for (i=0;i<n_ISForEdges;i++) {
1800         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1801       }
1802       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1803       n_ISForEdges = 0;
1804     }
1805     if (!pcbddc->use_faces) {
1806       for (i=0;i<n_ISForFaces;i++) {
1807         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1808       }
1809       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1810       n_ISForFaces = 0;
1811     }
1812     /* HACKS (the following two blocks of code) */
1813     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1814       pcbddc->use_change_of_basis = PETSC_TRUE;
1815       if (!ISForEdges) {
1816         pcbddc->use_change_on_faces = PETSC_TRUE;
1817       }
1818     }
1819     if (pcbddc->NullSpace) {
1820       /* use_change_of_basis should be consistent among processors */
1821       PetscBool tbool[2],gbool[2];
1822       tbool [0] = pcbddc->use_change_of_basis;
1823       tbool [1] = pcbddc->use_change_on_faces;
1824       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1825       pcbddc->use_change_of_basis = gbool[0];
1826       pcbddc->use_change_on_faces = gbool[1];
1827     }
1828 
1829     /* check if near null space is attached to global mat */
1830     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1831     if (nearnullsp) {
1832       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1833       /* remove any stored info */
1834       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1835       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1836       /* store information for BDDC solver reuse */
1837       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1838       pcbddc->onearnullspace = nearnullsp;
1839       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1840       for (i=0;i<nnsp_size;i++) {
1841         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1842       }
1843     } else { /* if near null space is not provided BDDC uses constants by default */
1844       nnsp_size = 0;
1845       nnsp_has_cnst = PETSC_TRUE;
1846     }
1847     /* get max number of constraints on a single cc */
1848     max_constraints = nnsp_size;
1849     if (nnsp_has_cnst) max_constraints++;
1850 
1851     /*
1852          Evaluate maximum storage size needed by the procedure
1853          - temp_indices will contain start index of each constraint stored as follows
1854          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1855          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1856          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1857                                                                                                                                                            */
1858     total_counts = n_ISForFaces+n_ISForEdges;
1859     total_counts *= max_constraints;
1860     n_vertices = 0;
1861     if (ISForVertices) {
1862       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1863     }
1864     total_counts += n_vertices;
1865     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1866     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1867     total_counts = 0;
1868     max_size_of_constraint = 0;
1869     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1870       if (i<n_ISForEdges) {
1871         used_IS = &ISForEdges[i];
1872       } else {
1873         used_IS = &ISForFaces[i-n_ISForEdges];
1874       }
1875       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1876       total_counts += j;
1877       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1878     }
1879     total_counts *= max_constraints;
1880     total_counts += n_vertices;
1881     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1882     /* get local part of global near null space vectors */
1883     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1884     for (k=0;k<nnsp_size;k++) {
1885       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1886       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1887       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1888     }
1889 
1890     /* whether or not to skip lapack calls */
1891     skip_lapack = PETSC_TRUE;
1892     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1893 
1894     /* allocate some auxiliary stuff */
1895     if (!skip_lapack || pcbddc->use_qr_single) {
1896       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1897     } else {
1898       gidxs = NULL;
1899       permutation = NULL;
1900       temp_indices_to_constraint_work = NULL;
1901       temp_quadrature_constraint_work = NULL;
1902     }
1903 
1904     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1905     if (!skip_lapack) {
1906       PetscScalar temp_work;
1907 
1908 #if defined(PETSC_MISSING_LAPACK_GESVD)
1909       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1910       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1911       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1912       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1913 #if defined(PETSC_USE_COMPLEX)
1914       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1915 #endif
1916       /* now we evaluate the optimal workspace using query with lwork=-1 */
1917       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1918       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1919       lwork = -1;
1920       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1921 #if !defined(PETSC_USE_COMPLEX)
1922       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1923 #else
1924       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1925 #endif
1926       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1927       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1928 #else /* on missing GESVD */
1929       /* SVD */
1930       PetscInt max_n,min_n;
1931       max_n = max_size_of_constraint;
1932       min_n = max_constraints;
1933       if (max_size_of_constraint < max_constraints) {
1934         min_n = max_size_of_constraint;
1935         max_n = max_constraints;
1936       }
1937       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1938 #if defined(PETSC_USE_COMPLEX)
1939       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1940 #endif
1941       /* now we evaluate the optimal workspace using query with lwork=-1 */
1942       lwork = -1;
1943       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1944       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1945       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1946       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1947 #if !defined(PETSC_USE_COMPLEX)
1948       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1949 #else
1950       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1951 #endif
1952       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1953       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1954 #endif /* on missing GESVD */
1955       /* Allocate optimal workspace */
1956       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1957       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1958     }
1959     /* Now we can loop on constraining sets */
1960     total_counts = 0;
1961     temp_indices[0] = 0;
1962     /* vertices */
1963     if (ISForVertices) {
1964       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1965       if (nnsp_has_cnst) { /* consider all vertices */
1966         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1967         for (i=0;i<n_vertices;i++) {
1968           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1969           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1970           total_counts++;
1971         }
1972       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1973         PetscBool used_vertex;
1974         for (i=0;i<n_vertices;i++) {
1975           used_vertex = PETSC_FALSE;
1976           k = 0;
1977           while (!used_vertex && k<nnsp_size) {
1978             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1979             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1980               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1981               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1982               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1983               total_counts++;
1984               used_vertex = PETSC_TRUE;
1985             }
1986             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1987             k++;
1988           }
1989         }
1990       }
1991       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1992       n_vertices = total_counts;
1993     }
1994 
1995     /* edges and faces */
1996     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1997       if (ncc<n_ISForEdges) {
1998         used_IS = &ISForEdges[ncc];
1999         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2000       } else {
2001         used_IS = &ISForFaces[ncc-n_ISForEdges];
2002         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2003       }
2004       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2005       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2006       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2007       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2008       /* change of basis should not be performed on local periodic nodes */
2009       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2010       if (nnsp_has_cnst) {
2011         PetscScalar quad_value;
2012         temp_constraints++;
2013         if (!pcbddc->use_nnsp_true) {
2014           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2015         } else {
2016           quad_value = 1.0;
2017         }
2018         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2019         for (j=0;j<size_of_constraint;j++) {
2020           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2021         }
2022         /* sort by global ordering if using lapack subroutines (not needed!) */
2023         if (!skip_lapack || pcbddc->use_qr_single) {
2024           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2025           for (j=0;j<size_of_constraint;j++) {
2026             permutation[j]=j;
2027           }
2028           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2029           for (j=0;j<size_of_constraint;j++) {
2030             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2031           }
2032           for (j=0;j<size_of_constraint;j++) {
2033             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2034             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2035           }
2036           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2037           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2038         }
2039         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2040         total_counts++;
2041       }
2042       for (k=0;k<nnsp_size;k++) {
2043         PetscReal real_value;
2044         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2045         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2046         for (j=0;j<size_of_constraint;j++) {
2047           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2048         }
2049         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2050         /* check if array is null on the connected component */
2051         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2052         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2053         if (real_value > 0.0) { /* keep indices and values */
2054           /* sort by global ordering if using lapack subroutines */
2055           if (!skip_lapack || pcbddc->use_qr_single) {
2056             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2057             for (j=0;j<size_of_constraint;j++) {
2058               permutation[j]=j;
2059             }
2060             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2061             for (j=0;j<size_of_constraint;j++) {
2062               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2063               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2064             }
2065             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2066             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2067           }
2068           temp_constraints++;
2069           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2070           total_counts++;
2071         }
2072       }
2073       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2074       valid_constraints = temp_constraints;
2075       if (!pcbddc->use_nnsp_true && temp_constraints) {
2076         if (temp_constraints == 1) { /* just normalize the constraint */
2077           PetscScalar norm;
2078           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2079           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2080           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2081           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2082         } else { /* perform SVD */
2083           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2084 
2085 #if defined(PETSC_MISSING_LAPACK_GESVD)
2086           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2087              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2088              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2089                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2090                 from that computed using LAPACKgesvd
2091              -> This is due to a different computation of eigenvectors in LAPACKheev
2092              -> The quality of the POD-computed basis will be the same */
2093           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2094           /* Store upper triangular part of correlation matrix */
2095           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2096           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2097           for (j=0;j<temp_constraints;j++) {
2098             for (k=0;k<j+1;k++) {
2099               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2100             }
2101           }
2102           /* compute eigenvalues and eigenvectors of correlation matrix */
2103           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2104           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2105 #if !defined(PETSC_USE_COMPLEX)
2106           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2107 #else
2108           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2109 #endif
2110           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2111           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2112           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2113           j = 0;
2114           while (j < temp_constraints && singular_vals[j] < tol) j++;
2115           total_counts = total_counts-j;
2116           valid_constraints = temp_constraints-j;
2117           /* scale and copy POD basis into used quadrature memory */
2118           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2119           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2120           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2121           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2122           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2123           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2124           if (j<temp_constraints) {
2125             PetscInt ii;
2126             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2127             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2128             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2129             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2130             for (k=0;k<temp_constraints-j;k++) {
2131               for (ii=0;ii<size_of_constraint;ii++) {
2132                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2133               }
2134             }
2135           }
2136 #else  /* on missing GESVD */
2137           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2138           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2139           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2140           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2141 #if !defined(PETSC_USE_COMPLEX)
2142           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2143 #else
2144           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2145 #endif
2146           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2147           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2148           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2149           k = temp_constraints;
2150           if (k > size_of_constraint) k = size_of_constraint;
2151           j = 0;
2152           while (j < k && singular_vals[k-j-1] < tol) j++;
2153           valid_constraints = k-j;
2154           total_counts = total_counts-temp_constraints+valid_constraints;
2155 #endif /* on missing GESVD */
2156         }
2157       }
2158       /* setting change_of_basis flag is safe now */
2159       if (boolforchange) {
2160         for (j=0;j<valid_constraints;j++) {
2161           PetscBTSet(change_basis,total_counts-j-1);
2162         }
2163       }
2164     }
2165     /* free workspace */
2166     if (!skip_lapack || pcbddc->use_qr_single) {
2167       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2168     }
2169     if (!skip_lapack) {
2170       ierr = PetscFree(work);CHKERRQ(ierr);
2171 #if defined(PETSC_USE_COMPLEX)
2172       ierr = PetscFree(rwork);CHKERRQ(ierr);
2173 #endif
2174       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2175 #if defined(PETSC_MISSING_LAPACK_GESVD)
2176       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2177       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2178 #endif
2179     }
2180     for (k=0;k<nnsp_size;k++) {
2181       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2182     }
2183     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2184     /* free index sets of faces, edges and vertices */
2185     for (i=0;i<n_ISForFaces;i++) {
2186       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2187     }
2188     if (n_ISForFaces) {
2189       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2190     }
2191     for (i=0;i<n_ISForEdges;i++) {
2192       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2193     }
2194     if (n_ISForEdges) {
2195       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2196     }
2197     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2198   } else {
2199     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2200     PetscInt        cum = 0;
2201 
2202     total_counts = 0;
2203     n_vertices = 0;
2204     if (sub_schurs->is_Ej_com) {
2205       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2206     }
2207     max_constraints = 0;
2208     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2209       total_counts += pcbddc->adaptive_constraints_n[i];
2210       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2211     }
2212     temp_indices = pcbddc->adaptive_constraints_ptrs;
2213     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2214     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2215 
2216 #if 0
2217     printf("Found %d totals\n",total_counts);
2218     for (i=0;i<total_counts;i++) {
2219       printf("const %d, start %d",i,temp_indices[i]);
2220       printf(" end %d:\n",temp_indices[i+1]);
2221       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2222         printf("  idxs %d",temp_indices_to_constraint[j]);
2223         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2224       }
2225     }
2226     for (i=0;i<n_vertices;i++) {
2227       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2228     }
2229     for (i=0;i<sub_schurs->n_subs;i++) {
2230       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2231     }
2232 #endif
2233 
2234     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2235     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2236     /* Change of basis */
2237     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2238     if (pcbddc->use_change_of_basis) {
2239       cum = n_vertices;
2240       for (i=0;i<sub_schurs->n_subs;i++) {
2241         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2242           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2243             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2244           }
2245         }
2246         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2247       }
2248     }
2249   }
2250 
2251   /* map temp_indices_to_constraint in boundary numbering */
2252   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2253   if (i != temp_indices[total_counts]) {
2254     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2255   }
2256 
2257   /* set quantities in pcbddc data structure and store previous primal size */
2258   /* n_vertices defines the number of subdomain corners in the primal space */
2259   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2260   olocal_primal_size = pcbddc->local_primal_size;
2261   pcbddc->local_primal_size = total_counts;
2262   pcbddc->n_vertices = n_vertices;
2263   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2264 
2265   /* Create constraint matrix */
2266   /* The constraint matrix is used to compute the l2g map of primal dofs */
2267   /* so we need to set it up properly either with or without change of basis */
2268   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2269   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2270   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2271   /* array to compute a local numbering of constraints : vertices first then constraints */
2272   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2273   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2274   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2275   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2276   /* auxiliary stuff for basis change */
2277   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2278   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2279 
2280   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2281   total_primal_vertices=0;
2282   for (i=0;i<pcbddc->local_primal_size;i++) {
2283     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2284     if (size_of_constraint == 1) {
2285       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2286       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2287       aux_primal_minloc[total_primal_vertices]=0;
2288       total_primal_vertices++;
2289     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2290       PetscInt min_loc,min_index;
2291       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2292       /* find first untouched local node */
2293       k = 0;
2294       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2295       min_index = global_indices[k];
2296       min_loc = k;
2297       /* search the minimum among global nodes already untouched on the cc */
2298       for (k=1;k<size_of_constraint;k++) {
2299         /* there can be more than one constraint on a single connected component */
2300         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2301           min_index = global_indices[k];
2302           min_loc = k;
2303         }
2304       }
2305       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2306       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2307       aux_primal_minloc[total_primal_vertices]=min_loc;
2308       total_primal_vertices++;
2309     }
2310   }
2311   /* determine if a QR strategy is needed for change of basis */
2312   qr_needed = PETSC_FALSE;
2313   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2314   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2315     if (PetscBTLookup(change_basis,i)) {
2316       if (!pcbddc->use_qr_single) {
2317         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2318         j = 0;
2319         for (k=0;k<size_of_constraint;k++) {
2320           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2321             j++;
2322           }
2323         }
2324         /* found more than one primal dof on the cc */
2325         if (j > 1) {
2326           PetscBTSet(qr_needed_idx,i);
2327           qr_needed = PETSC_TRUE;
2328         }
2329       } else {
2330         PetscBTSet(qr_needed_idx,i);
2331         qr_needed = PETSC_TRUE;
2332       }
2333     }
2334   }
2335   /* free workspace */
2336   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2337 
2338   /* permute indices in order to have a sorted set of vertices */
2339   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2340 
2341   /* nonzero structure of constraint matrix */
2342   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2343   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2344   j=total_primal_vertices;
2345   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2346     if (!PetscBTLookup(change_basis,i)) {
2347       nnz[j]=temp_indices[i+1]-temp_indices[i];
2348       j++;
2349     }
2350   }
2351   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2352   ierr = PetscFree(nnz);CHKERRQ(ierr);
2353   /* set values in constraint matrix */
2354   for (i=0;i<total_primal_vertices;i++) {
2355     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2356   }
2357   total_counts = total_primal_vertices;
2358   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2359     if (!PetscBTLookup(change_basis,i)) {
2360       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2361       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2362       total_counts++;
2363     }
2364   }
2365   /* assembling */
2366   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2367   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368   /*
2369   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2370   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2371   */
2372   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2373   if (pcbddc->use_change_of_basis) {
2374     /* dual and primal dofs on a single cc */
2375     PetscInt     dual_dofs,primal_dofs;
2376     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2377     PetscInt     primal_counter;
2378     /* working stuff for GEQRF */
2379     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2380     PetscBLASInt lqr_work;
2381     /* working stuff for UNGQR */
2382     PetscScalar  *gqr_work,lgqr_work_t;
2383     PetscBLASInt lgqr_work;
2384     /* working stuff for TRTRS */
2385     PetscScalar  *trs_rhs;
2386     PetscBLASInt Blas_NRHS;
2387     /* pointers for values insertion into change of basis matrix */
2388     PetscInt     *start_rows,*start_cols;
2389     PetscScalar  *start_vals;
2390     /* working stuff for values insertion */
2391     PetscBT      is_primal;
2392     /* matrix sizes */
2393     PetscInt     global_size,local_size;
2394     /* temporary change of basis */
2395     Mat          localChangeOfBasisMatrix;
2396     /* extra space for debugging */
2397     PetscScalar  *dbg_work;
2398 
2399     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2400     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2401     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2402     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2403     /* nonzeros for local mat */
2404     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2405     for (i=0;i<pcis->n;i++) nnz[i]=1;
2406     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2407       if (PetscBTLookup(change_basis,i)) {
2408         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2409         if (PetscBTLookup(qr_needed_idx,i)) {
2410           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2411         } else {
2412           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2413           /* get local primal index on the cc */
2414           j = 0;
2415           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2416           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2417         }
2418       }
2419     }
2420     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2421     ierr = PetscFree(nnz);CHKERRQ(ierr);
2422     /* Set initial identity in the matrix */
2423     for (i=0;i<pcis->n;i++) {
2424       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2425     }
2426 
2427     if (pcbddc->dbg_flag) {
2428       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2429       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2430     }
2431 
2432 
2433     /* Now we loop on the constraints which need a change of basis */
2434     /*
2435        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2436        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2437 
2438        Basic blocks of change of basis matrix T computed by
2439 
2440           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2441 
2442             | 1        0   ...        0         s_1/S |
2443             | 0        1   ...        0         s_2/S |
2444             |              ...                        |
2445             | 0        ...            1     s_{n-1}/S |
2446             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2447 
2448             with S = \sum_{i=1}^n s_i^2
2449             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2450                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2451 
2452           - QR decomposition of constraints otherwise
2453     */
2454     if (qr_needed) {
2455       /* space to store Q */
2456       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2457       /* first we issue queries for optimal work */
2458       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2459       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2460       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2461       lqr_work = -1;
2462       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2463       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2464       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2465       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2466       lgqr_work = -1;
2467       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2468       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2469       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2470       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2471       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2472       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2473       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2474       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2475       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2476       /* array to store scaling factors for reflectors */
2477       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2478       /* array to store rhs and solution of triangular solver */
2479       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2480       /* allocating workspace for check */
2481       if (pcbddc->dbg_flag) {
2482         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2483       }
2484     }
2485     /* array to store whether a node is primal or not */
2486     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2487     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2488     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2489     if (i != total_primal_vertices) {
2490       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2491     }
2492     for (i=0;i<total_primal_vertices;i++) {
2493       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2494     }
2495     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2496 
2497     /* loop on constraints and see whether or not they need a change of basis and compute it */
2498     /* -> using implicit ordering contained in temp_indices data */
2499     total_counts = pcbddc->n_vertices;
2500     primal_counter = total_counts;
2501     while (total_counts<pcbddc->local_primal_size) {
2502       primal_dofs = 1;
2503       if (PetscBTLookup(change_basis,total_counts)) {
2504         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2505         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2506           primal_dofs++;
2507         }
2508         /* get constraint info */
2509         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2510         dual_dofs = size_of_constraint-primal_dofs;
2511 
2512         if (pcbddc->dbg_flag) {
2513           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2514         }
2515 
2516         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2517 
2518           /* copy quadrature constraints for change of basis check */
2519           if (pcbddc->dbg_flag) {
2520             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2521           }
2522           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2523           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2524 
2525           /* compute QR decomposition of constraints */
2526           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2527           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2528           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2529           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2530           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2531           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2532           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2533 
2534           /* explictly compute R^-T */
2535           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2536           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2537           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2538           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2539           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2540           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2541           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2542           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2543           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2544           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2545 
2546           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2547           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2548           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2549           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2550           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2551           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2552           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2553           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2554           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2555 
2556           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2557              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2558              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2559           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2560           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2561           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2562           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2563           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2564           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2565           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2566           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2567           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2568           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2569 
2570           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2571           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2572           /* insert cols for primal dofs */
2573           for (j=0;j<primal_dofs;j++) {
2574             start_vals = &qr_basis[j*size_of_constraint];
2575             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2576             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2577           }
2578           /* insert cols for dual dofs */
2579           for (j=0,k=0;j<dual_dofs;k++) {
2580             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2581               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2582               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2583               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2584               j++;
2585             }
2586           }
2587 
2588           /* check change of basis */
2589           if (pcbddc->dbg_flag) {
2590             PetscInt   ii,jj;
2591             PetscBool valid_qr=PETSC_TRUE;
2592             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2593             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2594             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2595             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2596             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2597             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2598             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2599             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2600             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2601             for (jj=0;jj<size_of_constraint;jj++) {
2602               for (ii=0;ii<primal_dofs;ii++) {
2603                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2604                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2605               }
2606             }
2607             if (!valid_qr) {
2608               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2609               for (jj=0;jj<size_of_constraint;jj++) {
2610                 for (ii=0;ii<primal_dofs;ii++) {
2611                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2612                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2613                   }
2614                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2615                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2616                   }
2617                 }
2618               }
2619             } else {
2620               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2621             }
2622           }
2623         } else { /* simple transformation block */
2624           PetscInt    row,col;
2625           PetscScalar val,norm;
2626 
2627           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2628           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2629           for (j=0;j<size_of_constraint;j++) {
2630             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2631             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2632             if (!PetscBTLookup(is_primal,row_B)) {
2633               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2634               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2635               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2636             } else {
2637               for (k=0;k<size_of_constraint;k++) {
2638                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2639                 if (row != col) {
2640                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2641                 } else {
2642                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2643                 }
2644                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2645               }
2646             }
2647           }
2648           if (pcbddc->dbg_flag) {
2649             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2650           }
2651         }
2652         /* increment primal counter */
2653         primal_counter += primal_dofs;
2654       } else {
2655         if (pcbddc->dbg_flag) {
2656           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2657         }
2658       }
2659       /* increment constraint counter total_counts */
2660       total_counts += primal_dofs;
2661     }
2662 
2663     /* free workspace */
2664     if (qr_needed) {
2665       if (pcbddc->dbg_flag) {
2666         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2667       }
2668       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2669       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2670       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2671       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2672       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2673     }
2674     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2675     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2676     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2677 
2678     /* assembling of global change of variable */
2679     {
2680       Mat      tmat;
2681       PetscInt bs;
2682 
2683       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2684       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2685       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2686       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2687       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2688       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2689       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2690       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2691       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2692       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2693       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2694       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2695       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2696       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2697       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2698       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2699       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2700       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2701     }
2702     /* check */
2703     if (pcbddc->dbg_flag) {
2704       PetscReal error;
2705       Vec       x,x_change;
2706 
2707       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2708       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2709       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2710       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2711       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2713       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2714       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2715       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2716       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2717       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2718       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2719       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2720       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2721       ierr = VecDestroy(&x);CHKERRQ(ierr);
2722       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2723     }
2724 
2725     /* adapt sub_schurs computed (if any) */
2726     if (pcbddc->use_deluxe_scaling) {
2727       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2728       if (sub_schurs->n_subs_par_g) {
2729         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2730       }
2731       if (sub_schurs->S_Ej_all) {
2732         Mat S_1,S_2,tmat;
2733         IS is_all_N;
2734 
2735         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2736         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2737         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2738         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2739         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2740         sub_schurs->S_Ej_all = S_1;
2741         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2742         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2743         sub_schurs->sum_S_Ej_all = S_2;
2744         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2745       }
2746     }
2747     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2748   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2749     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2750     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2751   }
2752 
2753   /* set up change of basis context */
2754   if (pcbddc->ChangeOfBasisMatrix) {
2755     PCBDDCChange_ctx change_ctx;
2756 
2757     if (!pcbddc->new_global_mat) {
2758       PetscInt global_size,local_size;
2759 
2760       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2761       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2762       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2763       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2764       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2765       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2766       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2767       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2768       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2769     } else {
2770       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2771       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2772       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2773     }
2774     if (!pcbddc->user_ChangeOfBasisMatrix) {
2775       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2776       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2777     } else {
2778       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2779       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2780     }
2781     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2782     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2783     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2785   }
2786 
2787   /* get indices in local ordering for vertices and constraints */
2788   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2789     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2790     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2791   }
2792   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2793   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2794   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2795   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2796   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2797   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2798   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2799   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2800   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2801   /* set quantities in PCBDDC data struct */
2802   pcbddc->n_actual_vertices = i;
2803   /* check if a new primal space has been introduced */
2804   pcbddc->new_primal_space_local = PETSC_TRUE;
2805   if (olocal_primal_size == pcbddc->local_primal_size) {
2806     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2807     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2808     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2809   }
2810   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2811   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2812 
2813   /* flush dbg viewer */
2814   if (pcbddc->dbg_flag) {
2815     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2816   }
2817 
2818   /* free workspace */
2819   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2820   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2821   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2822   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2823   if (!pcbddc->adaptive_selection) {
2824     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2825     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2826   } else {
2827     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2828                       pcbddc->adaptive_constraints_ptrs,
2829                       pcbddc->adaptive_constraints_idxs,
2830                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2831     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2832   }
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #undef __FUNCT__
2837 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2838 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2839 {
2840   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2841   PC_IS       *pcis = (PC_IS*)pc->data;
2842   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2843   PetscInt    ierr,i,vertex_size;
2844   PetscViewer viewer=pcbddc->dbg_viewer;
2845 
2846   PetscFunctionBegin;
2847   /* Reset previously computed graph */
2848   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2849   /* Init local Graph struct */
2850   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2851 
2852   /* Check validity of the csr graph passed in by the user */
2853   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2854     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2855   }
2856 
2857   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2858   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2859     PetscInt *xadj,*adjncy;
2860     PetscInt nvtxs;
2861 
2862     if (pcbddc->use_local_adj) {
2863       PetscBool flg_row=PETSC_FALSE;
2864 
2865       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2866       if (flg_row) {
2867         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2868         pcbddc->computed_rowadj = PETSC_TRUE;
2869       }
2870       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2871     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2872       IS                     is_dummy;
2873       ISLocalToGlobalMapping l2gmap_dummy;
2874       PetscInt               j,sum;
2875       PetscInt               *cxadj,*cadjncy;
2876       const PetscInt         *idxs;
2877       PCBDDCGraph            graph;
2878       PetscBT                is_on_boundary;
2879 
2880       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2881       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2882       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2883       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2884       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2885       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2886       graph->xadj = xadj;
2887       graph->adjncy = adjncy;
2888       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2889       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2890 
2891       if (pcbddc->dbg_flag) {
2892         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2893         for (i=0;i<graph->ncc;i++) {
2894           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2895         }
2896         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2897       }
2898 
2899       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2900       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2901       for (i=0;i<pcis->n_B;i++) {
2902         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2903       }
2904       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2905 
2906       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2907       sum = 0;
2908       for (i=0;i<graph->ncc;i++) {
2909         PetscInt sizecc = 0;
2910         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2911           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2912             sizecc++;
2913           }
2914         }
2915         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2916           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2917             cxadj[graph->queue[j]] = sizecc;
2918           }
2919         }
2920         sum += sizecc*sizecc;
2921       }
2922       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2923       sum = 0;
2924       for (i=0;i<nvtxs;i++) {
2925         PetscInt temp = cxadj[i];
2926         cxadj[i] = sum;
2927         sum += temp;
2928       }
2929       cxadj[nvtxs] = sum;
2930       for (i=0;i<graph->ncc;i++) {
2931         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2932           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2933             PetscInt k,sizecc = 0;
2934             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2935               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2936                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2937                 sizecc++;
2938               }
2939             }
2940           }
2941         }
2942       }
2943       if (nvtxs) {
2944         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2945       } else {
2946         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2947         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2948       }
2949       graph->xadj = 0;
2950       graph->adjncy = 0;
2951       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2952       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2953     }
2954   }
2955 
2956   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2957   vertex_size = 1;
2958   if (pcbddc->user_provided_isfordofs) {
2959     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2960       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2961       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2962         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2963         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2964       }
2965       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2966       pcbddc->n_ISForDofs = 0;
2967       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2968     }
2969     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2970     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2971   } else {
2972     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2973       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2974       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2975       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2976         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2977       }
2978     }
2979   }
2980 
2981   /* Setup of Graph */
2982   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2983     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2984   }
2985   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2986     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2987   }
2988   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2989 
2990   /* Graph's connected components analysis */
2991   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2992 
2993   /* print some info to stdout */
2994   if (pcbddc->dbg_flag) {
2995     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2996   }
2997 
2998   /* mark topography has done */
2999   pcbddc->recompute_topography = PETSC_FALSE;
3000   PetscFunctionReturn(0);
3001 }
3002 
3003 #undef __FUNCT__
3004 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3005 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3006 {
3007   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3008   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3009   PetscErrorCode ierr;
3010 
3011   PetscFunctionBegin;
3012   n = 0;
3013   vertices = 0;
3014   if (pcbddc->ConstraintMatrix) {
3015     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3016     for (i=0;i<local_primal_size;i++) {
3017       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3018       if (size_of_constraint == 1) n++;
3019       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3020     }
3021     if (vertices_idx) {
3022       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3023       n = 0;
3024       for (i=0;i<local_primal_size;i++) {
3025         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3026         if (size_of_constraint == 1) {
3027           vertices[n++]=row_cmat_indices[0];
3028         }
3029         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3030       }
3031     }
3032   }
3033   *n_vertices = n;
3034   if (vertices_idx) *vertices_idx = vertices;
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 #undef __FUNCT__
3039 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3040 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3041 {
3042   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3043   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3044   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3045   PetscBT        touched;
3046   PetscErrorCode ierr;
3047 
3048     /* This function assumes that the number of local constraints per connected component
3049        is not greater than the number of nodes defined for the connected component
3050        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3051   PetscFunctionBegin;
3052   n = 0;
3053   constraints_index = 0;
3054   if (pcbddc->ConstraintMatrix) {
3055     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3056     max_size_of_constraint = 0;
3057     for (i=0;i<local_primal_size;i++) {
3058       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3059       if (size_of_constraint > 1) {
3060         n++;
3061       }
3062       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3063       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3064     }
3065     if (constraints_idx) {
3066       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3067       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3068       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3069       n = 0;
3070       for (i=0;i<local_primal_size;i++) {
3071         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3072         if (size_of_constraint > 1) {
3073           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3074           /* find first untouched local node */
3075           j = 0;
3076           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3077           min_index = row_cmat_global_indices[j];
3078           min_loc = j;
3079           /* search the minimum among nodes not yet touched on the connected component
3080              since there can be more than one constraint on a single cc */
3081           for (j=1;j<size_of_constraint;j++) {
3082             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3083               min_index = row_cmat_global_indices[j];
3084               min_loc = j;
3085             }
3086           }
3087           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3088           constraints_index[n++] = row_cmat_indices[min_loc];
3089         }
3090         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3091       }
3092       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3093       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3094     }
3095   }
3096   *n_constraints = n;
3097   if (constraints_idx) *constraints_idx = constraints_index;
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 #undef __FUNCT__
3102 #define __FUNCT__ "PCBDDCSubsetNumbering"
3103 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3104 {
3105   Vec            local_vec,global_vec;
3106   IS             seqis,paris;
3107   VecScatter     scatter_ctx;
3108   PetscScalar    *array;
3109   PetscInt       *temp_global_dofs;
3110   PetscScalar    globalsum;
3111   PetscInt       i,j,s;
3112   PetscInt       nlocals,first_index,old_index,max_local;
3113   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3114   PetscMPIInt    *dof_sizes,*dof_displs;
3115   PetscBool      first_found;
3116   PetscErrorCode ierr;
3117 
3118   PetscFunctionBegin;
3119   /* mpi buffers */
3120   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3121   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3122   j = ( !rank_prec_comm ? size_prec_comm : 0);
3123   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3124   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3125   /* get maximum size of subset */
3126   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3127   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3128   max_local = 0;
3129   for (i=0;i<n_local_dofs;i++) {
3130     if (max_local < temp_global_dofs[i] ) {
3131       max_local = temp_global_dofs[i];
3132     }
3133   }
3134   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3135   max_global++;
3136   max_local = 0;
3137   for (i=0;i<n_local_dofs;i++) {
3138     if (max_local < local_dofs[i] ) {
3139       max_local = local_dofs[i];
3140     }
3141   }
3142   max_local++;
3143   /* allocate workspace */
3144   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3145   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3146   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3147   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3148   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3149   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3150   /* create scatter */
3151   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3152   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3153   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3154   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3155   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3156   /* init array */
3157   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3158   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3159   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3160   if (local_dofs_mult) {
3161     for (i=0;i<n_local_dofs;i++) {
3162       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3163     }
3164   } else {
3165     for (i=0;i<n_local_dofs;i++) {
3166       array[local_dofs[i]]=1.0;
3167     }
3168   }
3169   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3170   /* scatter into global vec and get total number of global dofs */
3171   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3172   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3173   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3174   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3175   /* Fill global_vec with cumulative function for global numbering */
3176   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3177   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3178   nlocals = 0;
3179   first_index = -1;
3180   first_found = PETSC_FALSE;
3181   for (i=0;i<s;i++) {
3182     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3183       first_found = PETSC_TRUE;
3184       first_index = i;
3185     }
3186     nlocals += (PetscInt)PetscRealPart(array[i]);
3187   }
3188   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3189   if (!rank_prec_comm) {
3190     dof_displs[0]=0;
3191     for (i=1;i<size_prec_comm;i++) {
3192       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3193     }
3194   }
3195   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3196   if (first_found) {
3197     array[first_index] += (PetscScalar)nlocals;
3198     old_index = first_index;
3199     for (i=first_index+1;i<s;i++) {
3200       if (PetscRealPart(array[i]) > 0.1) {
3201         array[i] += array[old_index];
3202         old_index = i;
3203       }
3204     }
3205   }
3206   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3207   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3208   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3209   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3210   /* get global ordering of local dofs */
3211   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3212   if (local_dofs_mult) {
3213     for (i=0;i<n_local_dofs;i++) {
3214       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3215     }
3216   } else {
3217     for (i=0;i<n_local_dofs;i++) {
3218       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3219     }
3220   }
3221   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3222   /* free workspace */
3223   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3224   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3225   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3226   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3227   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3228   /* return pointer to global ordering of local dofs */
3229   *global_numbering_subset = temp_global_dofs;
3230   PetscFunctionReturn(0);
3231 }
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3235 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3236 {
3237   PetscInt       i,j;
3238   PetscScalar    *alphas;
3239   PetscErrorCode ierr;
3240 
3241   PetscFunctionBegin;
3242   /* this implements stabilized Gram-Schmidt */
3243   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3244   for (i=0;i<n;i++) {
3245     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3246     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3247     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3248   }
3249   ierr = PetscFree(alphas);CHKERRQ(ierr);
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 #undef __FUNCT__
3254 #define __FUNCT__ "MatISGetSubassemblingPattern"
3255 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3256 {
3257   Mat             subdomain_adj;
3258   IS              new_ranks,ranks_send_to;
3259   MatPartitioning partitioner;
3260   Mat_IS          *matis;
3261   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3262   PetscInt        prank;
3263   PetscMPIInt     size,rank,color;
3264   PetscInt        *xadj,*adjncy,*oldranks;
3265   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3266   PetscInt        i,local_size,threshold=0;
3267   PetscErrorCode  ierr;
3268   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3269   PetscSubcomm    subcomm;
3270 
3271   PetscFunctionBegin;
3272   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3273   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3274   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3275 
3276   /* Get info on mapping */
3277   matis = (Mat_IS*)(mat->data);
3278   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3279   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3280 
3281   /* build local CSR graph of subdomains' connectivity */
3282   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3283   xadj[0] = 0;
3284   xadj[1] = PetscMax(n_neighs-1,0);
3285   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3286   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3287 
3288   if (threshold) {
3289     PetscInt xadj_count = 0;
3290     for (i=1;i<n_neighs;i++) {
3291       if (n_shared[i] > threshold) {
3292         adjncy[xadj_count] = neighs[i];
3293         adjncy_wgt[xadj_count] = n_shared[i];
3294         xadj_count++;
3295       }
3296     }
3297     xadj[1] = xadj_count;
3298   } else {
3299     if (xadj[1]) {
3300       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3301       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3302     }
3303   }
3304   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3305   if (use_square) {
3306     for (i=0;i<xadj[1];i++) {
3307       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3308     }
3309   }
3310   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3311 
3312   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3313 
3314   /*
3315     Restrict work on active processes only.
3316   */
3317   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3318   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3319   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3320   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3321   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3322   if (color) {
3323     ierr = PetscFree(xadj);CHKERRQ(ierr);
3324     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3325     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3326   } else {
3327     PetscInt coarsening_ratio;
3328     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3329     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3330     prank = rank;
3331     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3332     /*
3333     for (i=0;i<size;i++) {
3334       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3335     }
3336     */
3337     for (i=0;i<xadj[1];i++) {
3338       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3339     }
3340     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3341     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3342     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3343 
3344     /* Partition */
3345     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3346     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3347     if (use_vwgt) {
3348       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3349       v_wgt[0] = local_size;
3350       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3351     }
3352     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3353     coarsening_ratio = size/n_subdomains;
3354     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3355     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3356     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3357     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3358 
3359     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3360     if (contiguous) {
3361       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3362     } else {
3363       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3364     }
3365     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3366     /* clean up */
3367     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3368     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3369     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3370     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3371   }
3372   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3373 
3374   /* assemble parallel IS for sends */
3375   i = 1;
3376   if (color) i=0;
3377   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3378 
3379   /* get back IS */
3380   *is_sends = ranks_send_to;
3381   PetscFunctionReturn(0);
3382 }
3383 
3384 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3385 
3386 #undef __FUNCT__
3387 #define __FUNCT__ "MatISSubassemble"
3388 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3389 {
3390   Mat                    local_mat;
3391   Mat_IS                 *matis;
3392   IS                     is_sends_internal;
3393   PetscInt               rows,cols,new_local_rows;
3394   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3395   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3396   ISLocalToGlobalMapping l2gmap;
3397   PetscInt*              l2gmap_indices;
3398   const PetscInt*        is_indices;
3399   MatType                new_local_type;
3400   /* buffers */
3401   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3402   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3403   PetscInt               *recv_buffer_idxs_local;
3404   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3405   /* MPI */
3406   MPI_Comm               comm,comm_n;
3407   PetscSubcomm           subcomm;
3408   PetscMPIInt            n_sends,n_recvs,commsize;
3409   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3410   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3411   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3412   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3413   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3414   PetscErrorCode         ierr;
3415 
3416   PetscFunctionBegin;
3417   /* TODO: add missing checks */
3418   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3419   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3420   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3421   PetscValidLogicalCollectiveInt(mat,nis,7);
3422   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3423   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3424   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3425   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3426   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3427   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3428   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3429   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3430     PetscInt mrows,mcols,mnrows,mncols;
3431     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3432     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3433     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3434     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3435     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3436     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3437   }
3438   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3439   PetscValidLogicalCollectiveInt(mat,bs,0);
3440   /* prepare IS for sending if not provided */
3441   if (!is_sends) {
3442     PetscBool pcontig = PETSC_TRUE;
3443     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3444     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3445   } else {
3446     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3447     is_sends_internal = is_sends;
3448   }
3449 
3450   /* get pointer of MATIS data */
3451   matis = (Mat_IS*)mat->data;
3452 
3453   /* get comm */
3454   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3455 
3456   /* compute number of sends */
3457   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3458   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3459 
3460   /* compute number of receives */
3461   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3462   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3463   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3464   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3465   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3466   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3467   ierr = PetscFree(iflags);CHKERRQ(ierr);
3468 
3469   /* restrict comm if requested */
3470   subcomm = 0;
3471   destroy_mat = PETSC_FALSE;
3472   if (restrict_comm) {
3473     PetscMPIInt color,subcommsize;
3474 
3475     color = 0;
3476     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3477     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3478     subcommsize = commsize - subcommsize;
3479     /* check if reuse has been requested */
3480     if (reuse == MAT_REUSE_MATRIX) {
3481       if (*mat_n) {
3482         PetscMPIInt subcommsize2;
3483         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3484         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3485         comm_n = PetscObjectComm((PetscObject)*mat_n);
3486       } else {
3487         comm_n = PETSC_COMM_SELF;
3488       }
3489     } else { /* MAT_INITIAL_MATRIX */
3490       PetscMPIInt rank;
3491 
3492       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3493       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3494       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3495       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3496       comm_n = PetscSubcommChild(subcomm);
3497     }
3498     /* flag to destroy *mat_n if not significative */
3499     if (color) destroy_mat = PETSC_TRUE;
3500   } else {
3501     comm_n = comm;
3502   }
3503 
3504   /* prepare send/receive buffers */
3505   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3506   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3507   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3508   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3509   if (nis) {
3510     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3511   }
3512 
3513   /* Get data from local matrices */
3514   if (!isdense) {
3515     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3516     /* TODO: See below some guidelines on how to prepare the local buffers */
3517     /*
3518        send_buffer_vals should contain the raw values of the local matrix
3519        send_buffer_idxs should contain:
3520        - MatType_PRIVATE type
3521        - PetscInt        size_of_l2gmap
3522        - PetscInt        global_row_indices[size_of_l2gmap]
3523        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3524     */
3525   } else {
3526     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3527     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3528     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3529     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3530     send_buffer_idxs[1] = i;
3531     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3532     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3533     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3534     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3535     for (i=0;i<n_sends;i++) {
3536       ilengths_vals[is_indices[i]] = len*len;
3537       ilengths_idxs[is_indices[i]] = len+2;
3538     }
3539   }
3540   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3541   /* additional is (if any) */
3542   if (nis) {
3543     PetscMPIInt psum;
3544     PetscInt j;
3545     for (j=0,psum=0;j<nis;j++) {
3546       PetscInt plen;
3547       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3548       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3549       psum += len+1; /* indices + lenght */
3550     }
3551     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3552     for (j=0,psum=0;j<nis;j++) {
3553       PetscInt plen;
3554       const PetscInt *is_array_idxs;
3555       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3556       send_buffer_idxs_is[psum] = plen;
3557       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3558       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3559       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3560       psum += plen+1; /* indices + lenght */
3561     }
3562     for (i=0;i<n_sends;i++) {
3563       ilengths_idxs_is[is_indices[i]] = psum;
3564     }
3565     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3566   }
3567 
3568   buf_size_idxs = 0;
3569   buf_size_vals = 0;
3570   buf_size_idxs_is = 0;
3571   for (i=0;i<n_recvs;i++) {
3572     buf_size_idxs += (PetscInt)olengths_idxs[i];
3573     buf_size_vals += (PetscInt)olengths_vals[i];
3574     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3575   }
3576   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3577   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3578   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3579 
3580   /* get new tags for clean communications */
3581   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3582   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3583   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3584 
3585   /* allocate for requests */
3586   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3587   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3588   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3589   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3590   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3591   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3592 
3593   /* communications */
3594   ptr_idxs = recv_buffer_idxs;
3595   ptr_vals = recv_buffer_vals;
3596   ptr_idxs_is = recv_buffer_idxs_is;
3597   for (i=0;i<n_recvs;i++) {
3598     source_dest = onodes[i];
3599     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3600     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3601     ptr_idxs += olengths_idxs[i];
3602     ptr_vals += olengths_vals[i];
3603     if (nis) {
3604       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3605       ptr_idxs_is += olengths_idxs_is[i];
3606     }
3607   }
3608   for (i=0;i<n_sends;i++) {
3609     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3610     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3611     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3612     if (nis) {
3613       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3614     }
3615   }
3616   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3617   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3618 
3619   /* assemble new l2g map */
3620   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3621   ptr_idxs = recv_buffer_idxs;
3622   new_local_rows = 0;
3623   for (i=0;i<n_recvs;i++) {
3624     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3625     ptr_idxs += olengths_idxs[i];
3626   }
3627   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3628   ptr_idxs = recv_buffer_idxs;
3629   new_local_rows = 0;
3630   for (i=0;i<n_recvs;i++) {
3631     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3632     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3633     ptr_idxs += olengths_idxs[i];
3634   }
3635   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3636   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3637   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3638 
3639   /* infer new local matrix type from received local matrices type */
3640   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3641   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3642   if (n_recvs) {
3643     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3644     ptr_idxs = recv_buffer_idxs;
3645     for (i=0;i<n_recvs;i++) {
3646       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3647         new_local_type_private = MATAIJ_PRIVATE;
3648         break;
3649       }
3650       ptr_idxs += olengths_idxs[i];
3651     }
3652     switch (new_local_type_private) {
3653       case MATDENSE_PRIVATE:
3654         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3655           new_local_type = MATSEQAIJ;
3656           bs = 1;
3657         } else { /* if I receive only 1 dense matrix */
3658           new_local_type = MATSEQDENSE;
3659           bs = 1;
3660         }
3661         break;
3662       case MATAIJ_PRIVATE:
3663         new_local_type = MATSEQAIJ;
3664         bs = 1;
3665         break;
3666       case MATBAIJ_PRIVATE:
3667         new_local_type = MATSEQBAIJ;
3668         break;
3669       case MATSBAIJ_PRIVATE:
3670         new_local_type = MATSEQSBAIJ;
3671         break;
3672       default:
3673         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3674         break;
3675     }
3676   } else { /* by default, new_local_type is seqdense */
3677     new_local_type = MATSEQDENSE;
3678     bs = 1;
3679   }
3680 
3681   /* create MATIS object if needed */
3682   if (reuse == MAT_INITIAL_MATRIX) {
3683     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3684     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3685   } else {
3686     /* it also destroys the local matrices */
3687     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3688   }
3689   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3690   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3691 
3692   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3693 
3694   /* Global to local map of received indices */
3695   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3696   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3697   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3698 
3699   /* restore attributes -> type of incoming data and its size */
3700   buf_size_idxs = 0;
3701   for (i=0;i<n_recvs;i++) {
3702     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3703     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3704     buf_size_idxs += (PetscInt)olengths_idxs[i];
3705   }
3706   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3707 
3708   /* set preallocation */
3709   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3710   if (!newisdense) {
3711     PetscInt *new_local_nnz=0;
3712 
3713     ptr_vals = recv_buffer_vals;
3714     ptr_idxs = recv_buffer_idxs_local;
3715     if (n_recvs) {
3716       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3717     }
3718     for (i=0;i<n_recvs;i++) {
3719       PetscInt j;
3720       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3721         for (j=0;j<*(ptr_idxs+1);j++) {
3722           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3723         }
3724       } else {
3725         /* TODO */
3726       }
3727       ptr_idxs += olengths_idxs[i];
3728     }
3729     if (new_local_nnz) {
3730       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3731       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3732       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3733       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3734       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3735       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3736     } else {
3737       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3738     }
3739     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3740   } else {
3741     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3742   }
3743 
3744   /* set values */
3745   ptr_vals = recv_buffer_vals;
3746   ptr_idxs = recv_buffer_idxs_local;
3747   for (i=0;i<n_recvs;i++) {
3748     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3749       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3750       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3751       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3752       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3753       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3754     } else {
3755       /* TODO */
3756     }
3757     ptr_idxs += olengths_idxs[i];
3758     ptr_vals += olengths_vals[i];
3759   }
3760   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3762   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3763   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3765   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3766 
3767 #if 0
3768   if (!restrict_comm) { /* check */
3769     Vec       lvec,rvec;
3770     PetscReal infty_error;
3771 
3772     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3773     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3774     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3775     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3776     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3777     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3778     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3779     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3780     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3781   }
3782 #endif
3783 
3784   /* assemble new additional is (if any) */
3785   if (nis) {
3786     PetscInt **temp_idxs,*count_is,j,psum;
3787 
3788     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3789     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3790     ptr_idxs = recv_buffer_idxs_is;
3791     psum = 0;
3792     for (i=0;i<n_recvs;i++) {
3793       for (j=0;j<nis;j++) {
3794         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3795         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3796         psum += plen;
3797         ptr_idxs += plen+1; /* shift pointer to received data */
3798       }
3799     }
3800     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3801     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3802     for (i=1;i<nis;i++) {
3803       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3804     }
3805     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3806     ptr_idxs = recv_buffer_idxs_is;
3807     for (i=0;i<n_recvs;i++) {
3808       for (j=0;j<nis;j++) {
3809         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3810         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3811         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3812         ptr_idxs += plen+1; /* shift pointer to received data */
3813       }
3814     }
3815     for (i=0;i<nis;i++) {
3816       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3817       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3818       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3819     }
3820     ierr = PetscFree(count_is);CHKERRQ(ierr);
3821     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3822     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3823   }
3824   /* free workspace */
3825   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3826   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3827   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3828   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3829   if (isdense) {
3830     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3831     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3832   } else {
3833     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3834   }
3835   if (nis) {
3836     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3837     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3838   }
3839   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3840   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3841   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3842   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3843   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3844   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3845   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3846   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3847   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3848   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3849   ierr = PetscFree(onodes);CHKERRQ(ierr);
3850   if (nis) {
3851     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3852     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3853     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3854   }
3855   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3856   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3857     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3858     for (i=0;i<nis;i++) {
3859       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3860     }
3861   }
3862   PetscFunctionReturn(0);
3863 }
3864 
3865 /* temporary hack into ksp private data structure */
3866 #include <petsc-private/kspimpl.h>
3867 
3868 #undef __FUNCT__
3869 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3870 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3871 {
3872   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3873   PC_IS                  *pcis = (PC_IS*)pc->data;
3874   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3875   MatNullSpace           CoarseNullSpace=NULL;
3876   ISLocalToGlobalMapping coarse_islg;
3877   IS                     coarse_is,*isarray;
3878   PetscInt               i,im_active=-1,active_procs=-1;
3879   PetscInt               nis,nisdofs,nisneu;
3880   PC                     pc_temp;
3881   PCType                 coarse_pc_type;
3882   KSPType                coarse_ksp_type;
3883   PetscBool              multilevel_requested,multilevel_allowed;
3884   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3885   Mat                    t_coarse_mat_is;
3886   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3887   PetscMPIInt            all_procs;
3888   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3889   PetscBool              compute_vecs = PETSC_FALSE;
3890   PetscScalar            *array;
3891   PetscErrorCode         ierr;
3892 
3893   PetscFunctionBegin;
3894   /* Assign global numbering to coarse dofs */
3895   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3896     PetscInt ocoarse_size;
3897     compute_vecs = PETSC_TRUE;
3898     ocoarse_size = pcbddc->coarse_size;
3899     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3900     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3901     /* see if we can avoid some work */
3902     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3903       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3904         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3905         coarse_reuse = PETSC_FALSE;
3906       } else { /* we can safely reuse already computed coarse matrix */
3907         coarse_reuse = PETSC_TRUE;
3908       }
3909     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3910       coarse_reuse = PETSC_FALSE;
3911     }
3912     /* reset any subassembling information */
3913     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3914     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3915   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3916     coarse_reuse = PETSC_TRUE;
3917   }
3918 
3919   /* count "active" (i.e. with positive local size) and "void" processes */
3920   im_active = !!(pcis->n);
3921   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3922   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3923   void_procs = all_procs-active_procs;
3924   csin_type_simple = PETSC_TRUE;
3925   redist = PETSC_FALSE;
3926   if (pcbddc->current_level && void_procs) {
3927     csin_ml = PETSC_TRUE;
3928     ncoarse_ml = void_procs;
3929     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3930     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3931       csin_ds = PETSC_TRUE;
3932       ncoarse_ds = pcbddc->redistribute_coarse;
3933       redist = PETSC_TRUE;
3934     } else {
3935       csin_ds = PETSC_TRUE;
3936       ncoarse_ds = active_procs;
3937       redist = PETSC_TRUE;
3938     }
3939   } else {
3940     csin_ml = PETSC_FALSE;
3941     ncoarse_ml = all_procs;
3942     if (void_procs) {
3943       csin_ds = PETSC_TRUE;
3944       ncoarse_ds = void_procs;
3945       csin_type_simple = PETSC_FALSE;
3946     } else {
3947       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3948         csin_ds = PETSC_TRUE;
3949         ncoarse_ds = pcbddc->redistribute_coarse;
3950         redist = PETSC_TRUE;
3951       } else {
3952         csin_ds = PETSC_FALSE;
3953         ncoarse_ds = all_procs;
3954       }
3955     }
3956   }
3957 
3958   /*
3959     test if we can go multilevel: three conditions must be satisfied:
3960     - we have not exceeded the number of levels requested
3961     - we can actually subassemble the active processes
3962     - we can find a suitable number of MPI processes where we can place the subassembled problem
3963   */
3964   multilevel_allowed = PETSC_FALSE;
3965   multilevel_requested = PETSC_FALSE;
3966   if (pcbddc->current_level < pcbddc->max_levels) {
3967     multilevel_requested = PETSC_TRUE;
3968     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3969       multilevel_allowed = PETSC_FALSE;
3970     } else {
3971       multilevel_allowed = PETSC_TRUE;
3972     }
3973   }
3974   /* determine number of process partecipating to coarse solver */
3975   if (multilevel_allowed) {
3976     ncoarse = ncoarse_ml;
3977     csin = csin_ml;
3978     redist = PETSC_FALSE;
3979   } else {
3980     ncoarse = ncoarse_ds;
3981     csin = csin_ds;
3982   }
3983 
3984   /* creates temporary l2gmap and IS for coarse indexes */
3985   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3986   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3987 
3988   /* creates temporary MATIS object for coarse matrix */
3989   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3990   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3991   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3992   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3993 #if 0
3994   {
3995     PetscViewer viewer;
3996     char filename[256];
3997     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3998     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3999     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4000     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4001     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4002   }
4003 #endif
4004   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4005   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4006   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4007   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4008   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4009 
4010   /* compute dofs splitting and neumann boundaries for coarse dofs */
4011   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4012     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4013     const PetscInt         *idxs;
4014     ISLocalToGlobalMapping tmap;
4015 
4016     /* create map between primal indices (in local representative ordering) and local primal numbering */
4017     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4018     /* allocate space for temporary storage */
4019     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4020     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4021     /* allocate for IS array */
4022     nisdofs = pcbddc->n_ISForDofsLocal;
4023     nisneu = !!pcbddc->NeumannBoundariesLocal;
4024     nis = nisdofs + nisneu;
4025     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4026     /* dofs splitting */
4027     for (i=0;i<nisdofs;i++) {
4028       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4029       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4030       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4031       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4032       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4033       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4034       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4035       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4036     }
4037     /* neumann boundaries */
4038     if (pcbddc->NeumannBoundariesLocal) {
4039       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4040       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4041       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4042       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4043       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4044       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4045       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4046       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4047     }
4048     /* free memory */
4049     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4050     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4051     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4052   } else {
4053     nis = 0;
4054     nisdofs = 0;
4055     nisneu = 0;
4056     isarray = NULL;
4057   }
4058   /* destroy no longer needed map */
4059   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4060 
4061   /* restrict on coarse candidates (if needed) */
4062   coarse_mat_is = NULL;
4063   if (csin) {
4064     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4065       if (redist) {
4066         PetscMPIInt rank;
4067         PetscInt    spc,n_spc_p1,dest[1],destsize;
4068 
4069         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4070         spc = active_procs/ncoarse;
4071         n_spc_p1 = active_procs%ncoarse;
4072         if (im_active) {
4073           destsize = 1;
4074           if (rank > n_spc_p1*(spc+1)-1) {
4075             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4076           } else {
4077             dest[0] = rank/(spc+1);
4078           }
4079         } else {
4080           destsize = 0;
4081         }
4082         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4083       } else if (csin_type_simple) {
4084         PetscMPIInt rank;
4085         PetscInt    issize,isidx;
4086 
4087         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4088         if (im_active) {
4089           issize = 1;
4090           isidx = (PetscInt)rank;
4091         } else {
4092           issize = 0;
4093           isidx = -1;
4094         }
4095         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4096       } else { /* get a suitable subassembling pattern from MATIS code */
4097         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4098       }
4099 
4100       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4101       if (!redist || ncoarse <= void_procs) {
4102         PetscInt ncoarse_cand,tissize,*nisindices;
4103         PetscInt *coarse_candidates;
4104         const PetscInt* tisindices;
4105 
4106         /* get coarse candidates' ranks in pc communicator */
4107         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4108         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4109         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4110           if (!coarse_candidates[i]) {
4111             coarse_candidates[ncoarse_cand++]=i;
4112           }
4113         }
4114         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4115 
4116 
4117         if (pcbddc->dbg_flag) {
4118           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4119           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4120           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4121           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4122           for (i=0;i<ncoarse_cand;i++) {
4123             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4124           }
4125           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4126           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4127         }
4128         /* shift the pattern on coarse candidates */
4129         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4130         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4131         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4132         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4133         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4134         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4135         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4136       }
4137       if (pcbddc->dbg_flag) {
4138         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4139         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4140         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4141         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4142       }
4143     }
4144     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4145     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4146   } else {
4147     if (pcbddc->dbg_flag) {
4148       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4149       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4150       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4151     }
4152     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4153     coarse_mat_is = t_coarse_mat_is;
4154   }
4155 
4156   /* create local to global scatters for coarse problem */
4157   if (compute_vecs) {
4158     PetscInt lrows;
4159     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4160     if (coarse_mat_is) {
4161       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4162     } else {
4163       lrows = 0;
4164     }
4165     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4166     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4167     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4168     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4169     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4170   }
4171   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4172   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4173 
4174   /* set defaults for coarse KSP and PC */
4175   if (multilevel_allowed) {
4176     coarse_ksp_type = KSPRICHARDSON;
4177     coarse_pc_type = PCBDDC;
4178   } else {
4179     coarse_ksp_type = KSPPREONLY;
4180     coarse_pc_type = PCREDUNDANT;
4181   }
4182 
4183   /* print some info if requested */
4184   if (pcbddc->dbg_flag) {
4185     if (!multilevel_allowed) {
4186       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4187       if (multilevel_requested) {
4188         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4189       } else if (pcbddc->max_levels) {
4190         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4191       }
4192       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4193     }
4194   }
4195 
4196   /* create the coarse KSP object only once with defaults */
4197   if (coarse_mat_is) {
4198     MatReuse coarse_mat_reuse;
4199     PetscViewer dbg_viewer = NULL;
4200     if (pcbddc->dbg_flag) {
4201       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4202       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4203     }
4204     if (!pcbddc->coarse_ksp) {
4205       char prefix[256],str_level[16];
4206       size_t len;
4207       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4208       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4209       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4210       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4211       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4212       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4213       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4214       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4215       /* prefix */
4216       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4217       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4218       if (!pcbddc->current_level) {
4219         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4220         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4221       } else {
4222         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4223         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4224         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4225         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4226         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4227         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4228       }
4229       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4230       /* allow user customization */
4231       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4232       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4233     }
4234 
4235     /* get some info after set from options */
4236     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4237     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4238     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4239     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4240     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4241       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4242       isbddc = PETSC_FALSE;
4243     }
4244     if (isredundant) {
4245       KSP inner_ksp;
4246       PC inner_pc;
4247       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4248       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4249       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4250     }
4251 
4252     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4253     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4254     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4255     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4256     if (nisdofs) {
4257       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4258       for (i=0;i<nisdofs;i++) {
4259         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4260       }
4261     }
4262     if (nisneu) {
4263       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4264       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4265     }
4266 
4267     /* assemble coarse matrix */
4268     if (coarse_reuse) {
4269       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4270       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4271       coarse_mat_reuse = MAT_REUSE_MATRIX;
4272     } else {
4273       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4274     }
4275     if (isbddc || isnn) {
4276       if (pcbddc->coarsening_ratio > 1) {
4277         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4278           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4279           if (pcbddc->dbg_flag) {
4280             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4281             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4282             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4283             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4284           }
4285         }
4286         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4287       } else {
4288         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4289         coarse_mat = coarse_mat_is;
4290       }
4291     } else {
4292       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4293     }
4294     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4295 
4296     /* propagate symmetry info to coarse matrix */
4297     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4298     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4299 
4300     /* set operators */
4301     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4302     if (pcbddc->dbg_flag) {
4303       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4304     }
4305   } else { /* processes non partecipating to coarse solver (if any) */
4306     coarse_mat = 0;
4307   }
4308   ierr = PetscFree(isarray);CHKERRQ(ierr);
4309 #if 0
4310   {
4311     PetscViewer viewer;
4312     char filename[256];
4313     sprintf(filename,"coarse_mat.m");
4314     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4315     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4316     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4317     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4318   }
4319 #endif
4320 
4321   /* Compute coarse null space (special handling by BDDC only) */
4322   if (pcbddc->NullSpace) {
4323     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4324   }
4325 
4326   if (pcbddc->coarse_ksp) {
4327     Vec crhs,csol;
4328     PetscBool ispreonly;
4329     if (CoarseNullSpace) {
4330       if (isbddc) {
4331         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4332       } else {
4333         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4334       }
4335     }
4336     /* setup coarse ksp */
4337     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4338     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4339     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4340     /* hack */
4341     if (!csol) {
4342       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4343     }
4344     if (!crhs) {
4345       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4346     }
4347     /* Check coarse problem if in debug mode or if solving with an iterative method */
4348     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4349     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4350       KSP       check_ksp;
4351       KSPType   check_ksp_type;
4352       PC        check_pc;
4353       Vec       check_vec,coarse_vec;
4354       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4355       PetscInt  its;
4356       PetscBool compute_eigs;
4357       PetscReal *eigs_r,*eigs_c;
4358       PetscInt  neigs;
4359       const char *prefix;
4360 
4361       /* Create ksp object suitable for estimation of extreme eigenvalues */
4362       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4363       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4364       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4365       if (ispreonly) {
4366         check_ksp_type = KSPPREONLY;
4367         compute_eigs = PETSC_FALSE;
4368       } else {
4369         check_ksp_type = KSPGMRES;
4370         compute_eigs = PETSC_TRUE;
4371       }
4372       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4373       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4374       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4375       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4376       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4377       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4378       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4379       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4380       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4381       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4382       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4383       /* create random vec */
4384       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4385       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4386       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4387       if (CoarseNullSpace) {
4388         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4389       }
4390       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4391       /* solve coarse problem */
4392       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4393       if (CoarseNullSpace) {
4394         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4395       }
4396       /* set eigenvalue estimation if preonly has not been requested */
4397       if (compute_eigs) {
4398         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4399         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4400         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4401         lambda_max = eigs_r[neigs-1];
4402         lambda_min = eigs_r[0];
4403         if (pcbddc->use_coarse_estimates) {
4404           if (lambda_max>lambda_min) {
4405             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4406             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4407           }
4408         }
4409       }
4410 
4411       /* check coarse problem residual error */
4412       if (pcbddc->dbg_flag) {
4413         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4414         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4415         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4416         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4417         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4418         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4419         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4420         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4421         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4422         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4423         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4424         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4425         if (compute_eigs) {
4426           PetscReal lambda_max_s,lambda_min_s;
4427           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4428           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4429           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4430           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4431           for (i=0;i<neigs;i++) {
4432             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4433           }
4434         }
4435         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4436         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4437       }
4438       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4439       if (compute_eigs) {
4440         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4441         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4442       }
4443     }
4444   }
4445   /* print additional info */
4446   if (pcbddc->dbg_flag) {
4447     /* waits until all processes reaches this point */
4448     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4449     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4450     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4451   }
4452 
4453   /* free memory */
4454   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4455   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4456   PetscFunctionReturn(0);
4457 }
4458 
4459 #undef __FUNCT__
4460 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4461 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4462 {
4463   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4464   PC_IS*         pcis = (PC_IS*)pc->data;
4465   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4466   PetscInt       i,coarse_size;
4467   PetscInt       *local_primal_indices;
4468   PetscErrorCode ierr;
4469 
4470   PetscFunctionBegin;
4471   /* Compute global number of coarse dofs */
4472   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4473     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4474   }
4475   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4476 
4477   /* check numbering */
4478   if (pcbddc->dbg_flag) {
4479     PetscScalar coarsesum,*array;
4480     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4481 
4482     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4483     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4484     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4485     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4486     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4487     for (i=0;i<pcbddc->local_primal_size;i++) {
4488       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4489     }
4490     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4491     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4492     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4493     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4494     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4495     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4496     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4497     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4498     for (i=0;i<pcis->n;i++) {
4499       if (array[i] == 1.0) {
4500         set_error = PETSC_TRUE;
4501         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4502       }
4503     }
4504     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4505     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4506     for (i=0;i<pcis->n;i++) {
4507       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4508     }
4509     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4510     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4511     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4512     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4513     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4514     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4515     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4516       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4517       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4518       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4519       for (i=0;i<pcbddc->local_primal_size;i++) {
4520         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4521       }
4522       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4523     }
4524     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4525     if (set_error_reduced) {
4526       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4527     }
4528   }
4529   /* get back data */
4530   *coarse_size_n = coarse_size;
4531   *local_primal_indices_n = local_primal_indices;
4532   PetscFunctionReturn(0);
4533 }
4534 
4535 #undef __FUNCT__
4536 #define __FUNCT__ "PCBDDCGlobalToLocal"
4537 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4538 {
4539   IS             localis_t;
4540   PetscInt       i,lsize,*idxs,n;
4541   PetscScalar    *vals;
4542   PetscErrorCode ierr;
4543 
4544   PetscFunctionBegin;
4545   /* get indices in local ordering exploiting local to global map */
4546   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4547   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4548   for (i=0;i<lsize;i++) vals[i] = 1.0;
4549   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4550   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4551   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4552   if (idxs) { /* multilevel guard */
4553     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4554   }
4555   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4556   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4557   ierr = PetscFree(vals);CHKERRQ(ierr);
4558   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4559   /* now compute set in local ordering */
4560   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4561   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4562   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4563   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4564   for (i=0,lsize=0;i<n;i++) {
4565     if (PetscRealPart(vals[i]) > 0.5) {
4566       lsize++;
4567     }
4568   }
4569   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4570   for (i=0,lsize=0;i<n;i++) {
4571     if (PetscRealPart(vals[i]) > 0.5) {
4572       idxs[lsize++] = i;
4573     }
4574   }
4575   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4576   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4577   *localis = localis_t;
4578   PetscFunctionReturn(0);
4579 }
4580 
4581 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4582 #undef __FUNCT__
4583 #define __FUNCT__ "PCBDDCMatMult_Private"
4584 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4585 {
4586   PCBDDCChange_ctx change_ctx;
4587   PetscErrorCode   ierr;
4588 
4589   PetscFunctionBegin;
4590   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4591   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4592   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4593   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4594   PetscFunctionReturn(0);
4595 }
4596 
4597 #undef __FUNCT__
4598 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4599 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4600 {
4601   PCBDDCChange_ctx change_ctx;
4602   PetscErrorCode   ierr;
4603 
4604   PetscFunctionBegin;
4605   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4606   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4607   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4608   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4609   PetscFunctionReturn(0);
4610 }
4611 
4612 #undef __FUNCT__
4613 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4614 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4615 {
4616   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4617   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4618   PetscInt            *used_xadj,*used_adjncy;
4619   PetscBool           free_used_adj;
4620   PetscErrorCode      ierr;
4621 
4622   PetscFunctionBegin;
4623   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4624   free_used_adj = PETSC_FALSE;
4625   if (pcbddc->sub_schurs_layers == -1) {
4626     used_xadj = NULL;
4627     used_adjncy = NULL;
4628   } else {
4629     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4630       used_xadj = pcbddc->mat_graph->xadj;
4631       used_adjncy = pcbddc->mat_graph->adjncy;
4632     } else if (pcbddc->computed_rowadj) {
4633       used_xadj = pcbddc->mat_graph->xadj;
4634       used_adjncy = pcbddc->mat_graph->adjncy;
4635     } else {
4636       PetscBool      flg_row=PETSC_FALSE;
4637       const PetscInt *xadj,*adjncy;
4638       PetscInt       nvtxs;
4639 
4640       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4641       if (flg_row) {
4642         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4643         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4644         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4645         free_used_adj = PETSC_TRUE;
4646       } else {
4647         pcbddc->sub_schurs_layers = -1;
4648         used_xadj = NULL;
4649         used_adjncy = NULL;
4650       }
4651       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4652     }
4653   }
4654   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4655 
4656   /* free adjacency */
4657   if (free_used_adj) {
4658     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4659   }
4660   PetscFunctionReturn(0);
4661 }
4662 
4663 #undef __FUNCT__
4664 #define __FUNCT__ "PCBDDCInitSubSchurs"
4665 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4666 {
4667   PC_IS               *pcis=(PC_IS*)pc->data;
4668   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4669   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4670   PCBDDCGraph         graph;
4671   Mat                 S_j;
4672   PetscErrorCode      ierr;
4673 
4674   PetscFunctionBegin;
4675   /* attach interface graph for determining subsets */
4676   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4677     IS verticesIS;
4678 
4679     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4680     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4681     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4682     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4683     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4684     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4685 /*
4686     if (pcbddc->dbg_flag) {
4687       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4688     }
4689 */
4690   } else {
4691     graph = pcbddc->mat_graph;
4692   }
4693 
4694   /* Create Schur complement matrix */
4695   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4696   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4697 
4698   /* sub_schurs init */
4699   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4700   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4701   /* free graph struct */
4702   if (pcbddc->sub_schurs_rebuild) {
4703     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4704   }
4705   PetscFunctionReturn(0);
4706 }
4707