xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 964fefec379abe220a6a3c61a3e1174c41937197)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31   if (!sub_schurs->is_hermitian || !sub_schurs->is_posdef) {
32     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
33   }
34 
35   if (pcbddc->dbg_flag) {
36     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
37     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
38     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
39     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
40   }
41 
42   /* max size of subsets */
43   mss = 0;
44   for (i=0;i<sub_schurs->n_subs;i++) {
45     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
46       PetscInt subset_size;
47       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
48       mss = PetscMax(mss,subset_size);
49     }
50   }
51 
52   /* min/max and threshold */
53   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
54   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
55   nmax = PetscMax(nmin,nmax);
56   if (pcbddc->adaptive_threshold > 1.0) {
57     thresh = 1.0/pcbddc->adaptive_threshold;
58   } else {
59     thresh = 1.0;
60   }
61 
62   /* allocate lapack workspace */
63   cum = cum2 = 0;
64   maxneigs = 0;
65   for (i=0;i<sub_schurs->n_subs;i++) {
66     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
67       PetscInt n,subset_size;
68       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
69       n = PetscMin(subset_size,nmax);
70       cum += subset_size*n;
71       cum2 += n;
72       maxneigs = PetscMax(maxneigs,n);
73     }
74   }
75 
76   if (mss) {
77     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
78       PetscBLASInt B_itype = 1;
79       PetscBLASInt B_N = mss;
80       PetscReal    zero = 0.0;
81       PetscReal    eps = 0.0; /* dlamch? */
82 
83       B_lwork = -1;
84       S = NULL;
85       St = NULL;
86       eigs = NULL;
87       eigv = NULL;
88       B_iwork = NULL;
89       B_ifail = NULL;
90       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
91 #if defined(PETSC_USE_COMPLEX)
92       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
93 #else
94       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
95 #endif
96       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
97       ierr = PetscFPTrapPop();CHKERRQ(ierr);
98     } else {
99         /* TODO */
100     }
101   } else {
102     lwork = 0;
103   }
104 
105   nv = 0;
106   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
107     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
108   }
109   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
110   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
111                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
112 #if defined(PETSC_USE_COMPLEX)
113   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
114 #endif
115   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
116   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
117                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
118                       nv+cum,&pcbddc->adaptive_constraints_idxs,
119                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
120   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
121 
122   maxneigs = 0;
123   cum = cum2 = cumarray = 0;
124   if (sub_schurs->is_Ej_com) {
125     const PetscInt *idxs;
126 
127     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
128     for (cum=0;cum<nv;cum++) {
129       pcbddc->adaptive_constraints_n[cum] = 1;
130       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
131       pcbddc->adaptive_constraints_ptrs[cum] = cum;
132       pcbddc->adaptive_constraints_data[cum] = 1.0;
133     }
134     cum2 = cum;
135     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
136   }
137 
138   if (mss) { /* multilevel */
139     if (pcbddc->use_deluxe_scaling) {
140       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
141     } else {
142       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
143     }
144     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
145   }
146 
147   if (pcbddc->dbg_flag) {
148     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
149   }
150 
151   for (i=0;i<sub_schurs->n_subs;i++) {
152     PetscInt j,subset_size;
153 
154     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
155     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
156       const PetscInt *idxs;
157       PetscScalar    one = 1.0,scalar_zero = 0.0;
158       PetscReal      zero=0.0;
159       PetscBLASInt   B_N;
160 
161       /* S should be copied since we need it for deluxe scaling */
162       if (sub_schurs->is_hermitian) {
163         PetscInt j;
164         for (j=0;j<subset_size;j++) {
165           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
166         }
167         for (j=0;j<subset_size;j++) {
168           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
169         }
170       } else {
171         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
173       }
174       /* is always this the right matrix? */
175       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
176 
177       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
178       /* St = Starray+cumarray; */
179 
180       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
181         PetscBLASInt B_itype = 1;
182         PetscBLASInt B_IL = 1, B_IU;
183         PetscReal    eps = -1.0; /* dlamch? */
184         PetscInt     nmin_s;
185 
186         /* ask for eigenvalues lower than thresh */
187         if (pcbddc->dbg_flag) {
188           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d.\n",i,sub_schurs->n_subs);
189         }
190         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
191         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
192 #if defined(PETSC_USE_COMPLEX)
193         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
194 #else
195         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
196 #endif
197         ierr = PetscFPTrapPop();CHKERRQ(ierr);
198         if (B_ierr) {
199           if (B_ierr < 0 ) {
200             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
201           } else if (B_ierr <= B_N) {
202             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
203           } else {
204             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
205           }
206         }
207 
208         if (B_neigs > nmax) {
209           if (pcbddc->dbg_flag) {
210             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
211           }
212           B_neigs = nmax;
213         }
214 
215         nmin_s = PetscMin(nmin,B_N);
216         if (B_neigs < nmin_s) {
217           PetscBLASInt B_neigs2;
218 
219           B_IL = B_neigs + 1;
220           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
221           if (pcbddc->dbg_flag) {
222             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
223           }
224           if (sub_schurs->is_hermitian) {
225             PetscInt j;
226             for (j=0;j<subset_size;j++) {
227               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
228             }
229             for (j=0;j<subset_size;j++) {
230               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
231             }
232           } else {
233             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
234             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
235           }
236           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
237 #if defined(PETSC_USE_COMPLEX)
238           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
239 #else
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
241 #endif
242           ierr = PetscFPTrapPop();CHKERRQ(ierr);
243           B_neigs += B_neigs2;
244         }
245         if (B_ierr) {
246           if (B_ierr < 0 ) {
247             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
248           } else if (B_ierr <= B_N) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
250           } else {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
252           }
253         }
254         if (pcbddc->dbg_flag) {
255           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
256           for (j=0;j<B_neigs;j++) {
257             if (eigs[j] == 0.0) {
258               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
259             } else {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
261             }
262           }
263         }
264       } else {
265           /* TODO */
266       }
267       maxneigs = PetscMax(B_neigs,maxneigs);
268       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
269 
270       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
271       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
280           }
281         }
282       }
283       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
284       for (j=0;j<B_neigs;j++) {
285 #if 0
286         {
287           PetscBLASInt Blas_N,Blas_one = 1.0;
288           PetscScalar norm;
289           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
290           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
291           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
292             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
293           } else {
294             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
295           }
296           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
297         }
298 #endif
299         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
300         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
301         cum2 += subset_size;
302       }
303       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
304     }
305     /* shift for next computation */
306     cumarray += subset_size*subset_size;
307   }
308   if (pcbddc->dbg_flag) {
309     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
310   }
311   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
312   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
313 
314   if (mss) {
315     if (pcbddc->use_deluxe_scaling) {
316       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
317     } else {
318       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
319     }
320     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
321   }
322   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     MatType     checkmattype=MATSEQAIJ;
974     PetscReal   real_value;
975 
976     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
977     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
978     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
979     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
980     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
981     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
982     if (unsymmetric_check) {
983       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
984       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
985     }
986     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
987 
988     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
989     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
990     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
991     if (unsymmetric_check) {
992       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
993       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
994       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
995       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004     } else {
1005       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1006       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1007       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1008       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1009       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013     }
1014     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1015     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1016     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1017     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
1020 
1021     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1022     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1024     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1025     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1026     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1027     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1028     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1029     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1030     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1031     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1032     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1033     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1034     if (unsymmetric_check) {
1035       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1036       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1037     }
1038     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1039   }
1040 
1041   /* free memory */
1042   ierr = PetscFree(work);CHKERRQ(ierr);
1043   if (n_vertices) {
1044     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1045   }
1046   if (n_constraints) {
1047     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1048   }
1049   /* get back data */
1050   *coarse_submat_vals_n = coarse_submat_vals;
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1056 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1057 {
1058   Mat            *work_mat;
1059   IS             isrow_s,iscol_s;
1060   PetscBool      rsorted,csorted;
1061   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1066   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1067   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1068   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1069 
1070   if (!rsorted) {
1071     const PetscInt *idxs;
1072     PetscInt *idxs_sorted,i;
1073 
1074     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1075     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1076     for (i=0;i<rsize;i++) {
1077       idxs_perm_r[i] = i;
1078     }
1079     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1080     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1081     for (i=0;i<rsize;i++) {
1082       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1083     }
1084     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1085     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1086   } else {
1087     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1088     isrow_s = isrow;
1089   }
1090 
1091   if (!csorted) {
1092     if (isrow == iscol) {
1093       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1094       iscol_s = isrow_s;
1095     } else {
1096       const PetscInt *idxs;
1097       PetscInt *idxs_sorted,i;
1098 
1099       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1100       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1101       for (i=0;i<csize;i++) {
1102         idxs_perm_c[i] = i;
1103       }
1104       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1105       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1106       for (i=0;i<csize;i++) {
1107         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1108       }
1109       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1110       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1111     }
1112   } else {
1113     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1114     iscol_s = iscol;
1115   }
1116 
1117   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1118 
1119   if (!rsorted || !csorted) {
1120     Mat      new_mat;
1121     IS       is_perm_r,is_perm_c;
1122 
1123     if (!rsorted) {
1124       PetscInt *idxs_r,i;
1125       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1126       for (i=0;i<rsize;i++) {
1127         idxs_r[idxs_perm_r[i]] = i;
1128       }
1129       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1130       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1131     } else {
1132       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1133     }
1134     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1135 
1136     if (!csorted) {
1137       if (isrow_s == iscol_s) {
1138         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1139         is_perm_c = is_perm_r;
1140       } else {
1141         PetscInt *idxs_c,i;
1142         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1143         for (i=0;i<csize;i++) {
1144           idxs_c[idxs_perm_c[i]] = i;
1145         }
1146         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1147         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1148       }
1149     } else {
1150       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1151     }
1152     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1153 
1154     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1155     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1156     work_mat[0] = new_mat;
1157     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1158     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1159   }
1160 
1161   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1162   *B = work_mat[0];
1163   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1164   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1165   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1171 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1172 {
1173   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1174   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1175   Mat            new_mat;
1176   IS             is_local,is_global;
1177   PetscInt       local_size;
1178   PetscBool      isseqaij;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1183   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1184   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1185   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1186   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1187   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1188   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1189 
1190   /* check */
1191   if (pcbddc->dbg_flag) {
1192     Vec       x,x_change;
1193     PetscReal error;
1194 
1195     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1196     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1197     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1198     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1199     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1201     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1204     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1205     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1206     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1207     ierr = VecDestroy(&x);CHKERRQ(ierr);
1208     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1209   }
1210 
1211   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1212   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1213   if (isseqaij) {
1214     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1215   } else {
1216     Mat work_mat;
1217     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1218     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1219     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1220   }
1221   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1222   /*
1223   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1224   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1225   */
1226   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1232 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1233 {
1234   PC_IS*         pcis = (PC_IS*)(pc->data);
1235   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1236   IS             is_aux1,is_aux2;
1237   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1238   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1239   PetscInt       vbs,bs;
1240   PetscBT        bitmask;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   /*
1245     No need to setup local scatters if
1246       - primal space is unchanged
1247         AND
1248       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1249         AND
1250       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1251   */
1252   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1253     PetscFunctionReturn(0);
1254   }
1255   /* destroy old objects */
1256   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1257   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1258   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1259   /* Set Non-overlapping dimensions */
1260   n_B = pcis->n_B; n_D = pcis->n - n_B;
1261   n_vertices = pcbddc->n_actual_vertices;
1262   /* create auxiliary bitmask */
1263   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1264   for (i=0;i<n_vertices;i++) {
1265     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1266   }
1267 
1268   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1269   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1270   for (i=0, n_R=0; i<pcis->n; i++) {
1271     if (!PetscBTLookup(bitmask,i)) {
1272       idx_R_local[n_R] = i;
1273       n_R++;
1274     }
1275   }
1276 
1277   /* Block code */
1278   vbs = 1;
1279   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1280   if (bs>1 && !(n_vertices%bs)) {
1281     PetscBool is_blocked = PETSC_TRUE;
1282     PetscInt  *vary;
1283     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1284     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1285     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1286     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1287     for (i=0; i<n_vertices/bs; i++) {
1288       if (vary[i]!=0 && vary[i]!=bs) {
1289         is_blocked = PETSC_FALSE;
1290         break;
1291       }
1292     }
1293     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1294       vbs = bs;
1295       for (i=0;i<n_R/vbs;i++) {
1296         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1297       }
1298     }
1299     ierr = PetscFree(vary);CHKERRQ(ierr);
1300   }
1301   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1302   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1303 
1304   /* print some info if requested */
1305   if (pcbddc->dbg_flag) {
1306     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1307     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1308     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1309     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1310     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1311     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1312     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1313     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1314   }
1315 
1316   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1317   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1318   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1319   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1320   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1321   for (i=0; i<n_D; i++) {
1322     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1323   }
1324   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1325   for (i=0, j=0; i<n_R; i++) {
1326     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1327       aux_array1[j++] = i;
1328     }
1329   }
1330   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1331   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1332   for (i=0, j=0; i<n_B; i++) {
1333     if (!PetscBTLookup(bitmask,is_indices[i])) {
1334       aux_array2[j++] = i;
1335     }
1336   }
1337   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1338   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1339   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1340   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1341   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1342 
1343   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1344     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1345     for (i=0, j=0; i<n_R; i++) {
1346       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1347         aux_array1[j++] = i;
1348       }
1349     }
1350     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1352     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1353   }
1354   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1355   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1362 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1363 {
1364   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1365   PC_IS          *pcis = (PC_IS*)pc->data;
1366   PC             pc_temp;
1367   Mat            A_RR;
1368   MatReuse       reuse;
1369   PetscScalar    m_one = -1.0;
1370   PetscReal      value;
1371   PetscInt       n_D,n_R,ibs,mbs;
1372   PetscBool      use_exact,use_exact_reduced,issbaij;
1373   PetscErrorCode ierr;
1374   /* prefixes stuff */
1375   char           dir_prefix[256],neu_prefix[256],str_level[16];
1376   size_t         len;
1377 
1378   PetscFunctionBegin;
1379 
1380   /* compute prefixes */
1381   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1382   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1383   if (!pcbddc->current_level) {
1384     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1385     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1386     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1387     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1388   } else {
1389     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1390     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1391     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1392     len -= 15; /* remove "pc_bddc_coarse_" */
1393     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1394     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1395     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1396     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1397     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1398     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1399     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1400     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1401   }
1402 
1403   /* DIRICHLET PROBLEM */
1404   if (dirichlet) {
1405     if (pcbddc->issym) {
1406       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1407     }
1408     /* Matrix for Dirichlet problem is pcis->A_II */
1409     n_D = pcis->n - pcis->n_B;
1410     if (!pcbddc->ksp_D) { /* create object if not yet build */
1411       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1412       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1413       /* default */
1414       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1415       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1416       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1417       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1418       if (issbaij) {
1419         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1420       } else {
1421         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1422       }
1423       /* Allow user's customization */
1424       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1425       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1428     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1429     if (!n_D) {
1430       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1431       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1432     }
1433     /* Set Up KSP for Dirichlet problem of BDDC */
1434     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1435     /* set ksp_D into pcis data */
1436     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1437     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1438     pcis->ksp_D = pcbddc->ksp_D;
1439   }
1440 
1441   /* NEUMANN PROBLEM */
1442   A_RR = 0;
1443   if (neumann) {
1444     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1445     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1446     if (pcbddc->ksp_R) { /* already created ksp */
1447       PetscInt nn_R;
1448       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1449       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1450       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1451       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1452         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1453         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1454         reuse = MAT_INITIAL_MATRIX;
1455       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1456         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1457           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1458           reuse = MAT_INITIAL_MATRIX;
1459         } else { /* safe to reuse the matrix */
1460           reuse = MAT_REUSE_MATRIX;
1461         }
1462       }
1463       /* last check */
1464       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1465         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1466         reuse = MAT_INITIAL_MATRIX;
1467       }
1468     } else { /* first time, so we need to create the matrix */
1469       reuse = MAT_INITIAL_MATRIX;
1470     }
1471     /* extract A_RR */
1472     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1473     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1474     if (ibs != mbs) {
1475       Mat newmat;
1476       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1477       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1478       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1479     } else {
1480       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1481     }
1482     if (pcbddc->issym) {
1483       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1484     }
1485     if (!pcbddc->ksp_R) { /* create object if not present */
1486       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1487       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1488       /* default */
1489       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1490       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1491       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1492       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1493       if (issbaij) {
1494         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1495       } else {
1496         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1497       }
1498       /* Allow user's customization */
1499       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1500       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1501     }
1502     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1503     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1504     if (!n_R) {
1505       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1506       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1507     }
1508     /* Set Up KSP for Neumann problem of BDDC */
1509     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1510   }
1511 
1512   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1513   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1514     if (pcbddc->dbg_flag) {
1515       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1516       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1517       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1518     }
1519     if (dirichlet) { /* Dirichlet */
1520       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1521       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1522       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1523       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1524       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1525       /* need to be adapted? */
1526       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1527       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1528       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1529       /* print info */
1530       if (pcbddc->dbg_flag) {
1531         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1532         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1533       }
1534       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1535         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1536       }
1537     }
1538     if (neumann) { /* Neumann */
1539       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1540       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1541       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1542       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1543       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1544       /* need to be adapted? */
1545       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1546       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1547       /* print info */
1548       if (pcbddc->dbg_flag) {
1549         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1550         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1551       }
1552       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1553         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1554       }
1555     }
1556   }
1557   /* free Neumann problem's matrix */
1558   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1564 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1565 {
1566   PetscErrorCode ierr;
1567   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1568 
1569   PetscFunctionBegin;
1570   if (applytranspose) {
1571     if (pcbddc->local_auxmat1) {
1572       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1573       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1574     }
1575     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1576   } else {
1577     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1578     if (pcbddc->local_auxmat1) {
1579       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1580       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1581     }
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1587 #undef __FUNCT__
1588 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1589 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1590 {
1591   PetscErrorCode ierr;
1592   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1593   PC_IS*            pcis = (PC_IS*)  (pc->data);
1594   const PetscScalar zero = 0.0;
1595 
1596   PetscFunctionBegin;
1597   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1598   if (applytranspose) {
1599     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1600     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1601   } else {
1602     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1603     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1604   }
1605   /* start communications from local primal nodes to rhs of coarse solver */
1606   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1607   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609 
1610   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1611   /* TODO remove null space when doing multilevel */
1612   if (pcbddc->coarse_ksp) {
1613     Vec rhs,sol;
1614 
1615     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1616     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1617     if (applytranspose) {
1618       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1619     } else {
1620       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1621     }
1622   }
1623 
1624   /* Local solution on R nodes */
1625   if (pcis->n) {
1626     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1627     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1628     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1629     if (pcbddc->switch_static) {
1630       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1631       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1632     }
1633     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1634     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1635     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1636     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1637     if (pcbddc->switch_static) {
1638       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1639       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640     }
1641   }
1642 
1643   /* communications from coarse sol to local primal nodes */
1644   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1645   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1646 
1647   /* Sum contributions from two levels */
1648   if (applytranspose) {
1649     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1650     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1651   } else {
1652     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1654   }
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1660 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1661 {
1662   PetscErrorCode ierr;
1663   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1664   PetscScalar    *array;
1665   Vec            from,to;
1666 
1667   PetscFunctionBegin;
1668   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1669     from = pcbddc->coarse_vec;
1670     to = pcbddc->vec1_P;
1671     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1672       Vec tvec;
1673 
1674       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1675       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1676       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1677       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1678       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1679       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1680     }
1681   } else { /* from local to global -> put data in coarse right hand side */
1682     from = pcbddc->vec1_P;
1683     to = pcbddc->coarse_vec;
1684   }
1685   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1691 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1692 {
1693   PetscErrorCode ierr;
1694   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1695   PetscScalar    *array;
1696   Vec            from,to;
1697 
1698   PetscFunctionBegin;
1699   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1700     from = pcbddc->coarse_vec;
1701     to = pcbddc->vec1_P;
1702   } else { /* from local to global -> put data in coarse right hand side */
1703     from = pcbddc->vec1_P;
1704     to = pcbddc->coarse_vec;
1705   }
1706   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1707   if (smode == SCATTER_FORWARD) {
1708     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1709       Vec tvec;
1710 
1711       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1712       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1713       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1714       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1715     }
1716   } else {
1717     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1718      ierr = VecResetArray(from);CHKERRQ(ierr);
1719     }
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 /* uncomment for testing purposes */
1725 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1726 #undef __FUNCT__
1727 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1728 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1729 {
1730   PetscErrorCode    ierr;
1731   PC_IS*            pcis = (PC_IS*)(pc->data);
1732   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1733   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1734   /* one and zero */
1735   PetscScalar       one=1.0,zero=0.0;
1736   /* space to store constraints and their local indices */
1737   PetscScalar       *temp_quadrature_constraint;
1738   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1739   /* iterators */
1740   PetscInt          i,j,k,total_counts,temp_start_ptr;
1741   /* BLAS integers */
1742   PetscBLASInt      lwork,lierr;
1743   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1744   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1745   /* reuse */
1746   PetscInt          olocal_primal_size;
1747   PetscInt          *oprimal_indices_local_idxs;
1748   /* change of basis */
1749   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1750   PetscBool         boolforchange,qr_needed;
1751   PetscBT           touched,change_basis,qr_needed_idx;
1752   /* auxiliary stuff */
1753   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1754   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1755   PetscScalar       *temp_quadrature_constraint_work=NULL;
1756   /* some quantities */
1757   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1758   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1759 
1760   PetscFunctionBegin;
1761   /* Destroy Mat objects computed previously */
1762   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1763   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1764 
1765   /* print some info */
1766   if (pcbddc->dbg_flag) {
1767     IS       vertices;
1768     PetscInt nv,nedges,nfaces;
1769     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1770     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1771     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1772     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1773     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1774     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1775     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1776     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1777     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1778   }
1779 
1780   if (!pcbddc->adaptive_selection) {
1781     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1782     MatNullSpace nearnullsp;
1783     const Vec    *nearnullvecs;
1784     Vec          *localnearnullsp;
1785     PetscScalar  *array;
1786     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1787     PetscBool    nnsp_has_cnst;
1788     /* LAPACK working arrays for SVD or POD */
1789     PetscBool    skip_lapack;
1790     PetscScalar  *work;
1791     PetscReal    *singular_vals;
1792 #if defined(PETSC_USE_COMPLEX)
1793     PetscReal    *rwork;
1794 #endif
1795 #if defined(PETSC_MISSING_LAPACK_GESVD)
1796     PetscScalar  *temp_basis,*correlation_mat;
1797 #else
1798     PetscBLASInt dummy_int=1;
1799     PetscScalar  dummy_scalar=1.;
1800 #endif
1801 
1802     /* Get index sets for faces, edges and vertices from graph */
1803     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1804     /* free unneeded index sets */
1805     if (!pcbddc->use_vertices) {
1806       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1807     }
1808     if (!pcbddc->use_edges) {
1809       for (i=0;i<n_ISForEdges;i++) {
1810         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1811       }
1812       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1813       n_ISForEdges = 0;
1814     }
1815     if (!pcbddc->use_faces) {
1816       for (i=0;i<n_ISForFaces;i++) {
1817         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1818       }
1819       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1820       n_ISForFaces = 0;
1821     }
1822     /* HACKS (the following two blocks of code) */
1823     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1824       pcbddc->use_change_of_basis = PETSC_TRUE;
1825       if (!ISForEdges) {
1826         pcbddc->use_change_on_faces = PETSC_TRUE;
1827       }
1828     }
1829     if (pcbddc->NullSpace) {
1830       /* use_change_of_basis should be consistent among processors */
1831       PetscBool tbool[2],gbool[2];
1832       tbool [0] = pcbddc->use_change_of_basis;
1833       tbool [1] = pcbddc->use_change_on_faces;
1834       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1835       pcbddc->use_change_of_basis = gbool[0];
1836       pcbddc->use_change_on_faces = gbool[1];
1837     }
1838 
1839     /* check if near null space is attached to global mat */
1840     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1841     if (nearnullsp) {
1842       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1843       /* remove any stored info */
1844       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1845       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1846       /* store information for BDDC solver reuse */
1847       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1848       pcbddc->onearnullspace = nearnullsp;
1849       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1850       for (i=0;i<nnsp_size;i++) {
1851         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1852       }
1853     } else { /* if near null space is not provided BDDC uses constants by default */
1854       nnsp_size = 0;
1855       nnsp_has_cnst = PETSC_TRUE;
1856     }
1857     /* get max number of constraints on a single cc */
1858     max_constraints = nnsp_size;
1859     if (nnsp_has_cnst) max_constraints++;
1860 
1861     /*
1862          Evaluate maximum storage size needed by the procedure
1863          - temp_indices will contain start index of each constraint stored as follows
1864          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1865          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1866          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1867                                                                                                                                                            */
1868     total_counts = n_ISForFaces+n_ISForEdges;
1869     total_counts *= max_constraints;
1870     n_vertices = 0;
1871     if (ISForVertices) {
1872       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1873     }
1874     total_counts += n_vertices;
1875     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1876     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1877     total_counts = 0;
1878     max_size_of_constraint = 0;
1879     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1880       if (i<n_ISForEdges) {
1881         used_IS = &ISForEdges[i];
1882       } else {
1883         used_IS = &ISForFaces[i-n_ISForEdges];
1884       }
1885       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1886       total_counts += j;
1887       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1888     }
1889     total_counts *= max_constraints;
1890     total_counts += n_vertices;
1891     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1892     /* get local part of global near null space vectors */
1893     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1894     for (k=0;k<nnsp_size;k++) {
1895       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1896       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1897       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1898     }
1899 
1900     /* whether or not to skip lapack calls */
1901     skip_lapack = PETSC_TRUE;
1902     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1903 
1904     /* allocate some auxiliary stuff */
1905     if (!skip_lapack || pcbddc->use_qr_single) {
1906       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1907     } else {
1908       gidxs = NULL;
1909       permutation = NULL;
1910       temp_indices_to_constraint_work = NULL;
1911       temp_quadrature_constraint_work = NULL;
1912     }
1913 
1914     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1915     if (!skip_lapack) {
1916       PetscScalar temp_work;
1917 
1918 #if defined(PETSC_MISSING_LAPACK_GESVD)
1919       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1920       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1921       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1922       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1923 #if defined(PETSC_USE_COMPLEX)
1924       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1925 #endif
1926       /* now we evaluate the optimal workspace using query with lwork=-1 */
1927       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1928       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1929       lwork = -1;
1930       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1931 #if !defined(PETSC_USE_COMPLEX)
1932       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1933 #else
1934       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1935 #endif
1936       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1937       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1938 #else /* on missing GESVD */
1939       /* SVD */
1940       PetscInt max_n,min_n;
1941       max_n = max_size_of_constraint;
1942       min_n = max_constraints;
1943       if (max_size_of_constraint < max_constraints) {
1944         min_n = max_size_of_constraint;
1945         max_n = max_constraints;
1946       }
1947       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1948 #if defined(PETSC_USE_COMPLEX)
1949       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1950 #endif
1951       /* now we evaluate the optimal workspace using query with lwork=-1 */
1952       lwork = -1;
1953       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1954       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1955       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1956       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1957 #if !defined(PETSC_USE_COMPLEX)
1958       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1959 #else
1960       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1961 #endif
1962       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1963       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1964 #endif /* on missing GESVD */
1965       /* Allocate optimal workspace */
1966       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1967       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1968     }
1969     /* Now we can loop on constraining sets */
1970     total_counts = 0;
1971     temp_indices[0] = 0;
1972     /* vertices */
1973     if (ISForVertices) {
1974       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1975       if (nnsp_has_cnst) { /* consider all vertices */
1976         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1977         for (i=0;i<n_vertices;i++) {
1978           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1979           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1980           total_counts++;
1981         }
1982       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1983         PetscBool used_vertex;
1984         for (i=0;i<n_vertices;i++) {
1985           used_vertex = PETSC_FALSE;
1986           k = 0;
1987           while (!used_vertex && k<nnsp_size) {
1988             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1989             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1990               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1991               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1992               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1993               total_counts++;
1994               used_vertex = PETSC_TRUE;
1995             }
1996             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1997             k++;
1998           }
1999         }
2000       }
2001       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2002       n_vertices = total_counts;
2003     }
2004 
2005     /* edges and faces */
2006     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2007       if (ncc<n_ISForEdges) {
2008         used_IS = &ISForEdges[ncc];
2009         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2010       } else {
2011         used_IS = &ISForFaces[ncc-n_ISForEdges];
2012         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2013       }
2014       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2015       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2016       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2017       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2018       /* change of basis should not be performed on local periodic nodes */
2019       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2020       if (nnsp_has_cnst) {
2021         PetscScalar quad_value;
2022         temp_constraints++;
2023         if (!pcbddc->use_nnsp_true) {
2024           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2025         } else {
2026           quad_value = 1.0;
2027         }
2028         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2029         for (j=0;j<size_of_constraint;j++) {
2030           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2031         }
2032         /* sort by global ordering if using lapack subroutines (not needed!) */
2033         if (!skip_lapack || pcbddc->use_qr_single) {
2034           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2035           for (j=0;j<size_of_constraint;j++) {
2036             permutation[j]=j;
2037           }
2038           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2039           for (j=0;j<size_of_constraint;j++) {
2040             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2041           }
2042           for (j=0;j<size_of_constraint;j++) {
2043             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2044             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2045           }
2046           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2047           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2048         }
2049         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2050         total_counts++;
2051       }
2052       for (k=0;k<nnsp_size;k++) {
2053         PetscReal real_value;
2054         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2055         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2056         for (j=0;j<size_of_constraint;j++) {
2057           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2058         }
2059         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2060         /* check if array is null on the connected component */
2061         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2062         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2063         if (real_value > 0.0) { /* keep indices and values */
2064           /* sort by global ordering if using lapack subroutines */
2065           if (!skip_lapack || pcbddc->use_qr_single) {
2066             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2067             for (j=0;j<size_of_constraint;j++) {
2068               permutation[j]=j;
2069             }
2070             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2071             for (j=0;j<size_of_constraint;j++) {
2072               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2073               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2074             }
2075             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2076             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2077           }
2078           temp_constraints++;
2079           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2080           total_counts++;
2081         }
2082       }
2083       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2084       valid_constraints = temp_constraints;
2085       if (!pcbddc->use_nnsp_true && temp_constraints) {
2086         if (temp_constraints == 1) { /* just normalize the constraint */
2087           PetscScalar norm;
2088           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2089           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2090           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2091           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2092         } else { /* perform SVD */
2093           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2094 
2095 #if defined(PETSC_MISSING_LAPACK_GESVD)
2096           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2097              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2098              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2099                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2100                 from that computed using LAPACKgesvd
2101              -> This is due to a different computation of eigenvectors in LAPACKheev
2102              -> The quality of the POD-computed basis will be the same */
2103           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2104           /* Store upper triangular part of correlation matrix */
2105           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2106           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2107           for (j=0;j<temp_constraints;j++) {
2108             for (k=0;k<j+1;k++) {
2109               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2110             }
2111           }
2112           /* compute eigenvalues and eigenvectors of correlation matrix */
2113           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2114           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2115 #if !defined(PETSC_USE_COMPLEX)
2116           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2117 #else
2118           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2119 #endif
2120           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2121           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2122           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2123           j = 0;
2124           while (j < temp_constraints && singular_vals[j] < tol) j++;
2125           total_counts = total_counts-j;
2126           valid_constraints = temp_constraints-j;
2127           /* scale and copy POD basis into used quadrature memory */
2128           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2129           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2130           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2131           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2132           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2133           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2134           if (j<temp_constraints) {
2135             PetscInt ii;
2136             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2137             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2138             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2139             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2140             for (k=0;k<temp_constraints-j;k++) {
2141               for (ii=0;ii<size_of_constraint;ii++) {
2142                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2143               }
2144             }
2145           }
2146 #else  /* on missing GESVD */
2147           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2148           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2149           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2150           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2151 #if !defined(PETSC_USE_COMPLEX)
2152           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2153 #else
2154           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2155 #endif
2156           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2157           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2158           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2159           k = temp_constraints;
2160           if (k > size_of_constraint) k = size_of_constraint;
2161           j = 0;
2162           while (j < k && singular_vals[k-j-1] < tol) j++;
2163           valid_constraints = k-j;
2164           total_counts = total_counts-temp_constraints+valid_constraints;
2165 #endif /* on missing GESVD */
2166         }
2167       }
2168       /* setting change_of_basis flag is safe now */
2169       if (boolforchange) {
2170         for (j=0;j<valid_constraints;j++) {
2171           PetscBTSet(change_basis,total_counts-j-1);
2172         }
2173       }
2174     }
2175     /* free workspace */
2176     if (!skip_lapack || pcbddc->use_qr_single) {
2177       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2178     }
2179     if (!skip_lapack) {
2180       ierr = PetscFree(work);CHKERRQ(ierr);
2181 #if defined(PETSC_USE_COMPLEX)
2182       ierr = PetscFree(rwork);CHKERRQ(ierr);
2183 #endif
2184       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2185 #if defined(PETSC_MISSING_LAPACK_GESVD)
2186       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2187       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2188 #endif
2189     }
2190     for (k=0;k<nnsp_size;k++) {
2191       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2192     }
2193     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2194     /* free index sets of faces, edges and vertices */
2195     for (i=0;i<n_ISForFaces;i++) {
2196       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2197     }
2198     if (n_ISForFaces) {
2199       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2200     }
2201     for (i=0;i<n_ISForEdges;i++) {
2202       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2203     }
2204     if (n_ISForEdges) {
2205       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2206     }
2207     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2208   } else {
2209     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2210     PetscInt        cum = 0;
2211 
2212     total_counts = 0;
2213     n_vertices = 0;
2214     if (sub_schurs->is_Ej_com) {
2215       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2216     }
2217     max_constraints = 0;
2218     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2219       total_counts += pcbddc->adaptive_constraints_n[i];
2220       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2221     }
2222     temp_indices = pcbddc->adaptive_constraints_ptrs;
2223     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2224     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2225 
2226 #if 0
2227     printf("Found %d totals\n",total_counts);
2228     for (i=0;i<total_counts;i++) {
2229       printf("const %d, start %d",i,temp_indices[i]);
2230       printf(" end %d:\n",temp_indices[i+1]);
2231       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2232         printf("  idxs %d",temp_indices_to_constraint[j]);
2233         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2234       }
2235     }
2236     for (i=0;i<n_vertices;i++) {
2237       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2238     }
2239     for (i=0;i<sub_schurs->n_subs;i++) {
2240       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2241     }
2242 #endif
2243 
2244     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2245     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2246     /* Change of basis */
2247     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2248     if (pcbddc->use_change_of_basis) {
2249       cum = n_vertices;
2250       for (i=0;i<sub_schurs->n_subs;i++) {
2251         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2252           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2253             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2254           }
2255         }
2256         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2257       }
2258     }
2259   }
2260 
2261   /* map temp_indices_to_constraint in boundary numbering */
2262   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2263   if (i != temp_indices[total_counts]) {
2264     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2265   }
2266 
2267   /* set quantities in pcbddc data structure and store previous primal size */
2268   /* n_vertices defines the number of subdomain corners in the primal space */
2269   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2270   olocal_primal_size = pcbddc->local_primal_size;
2271   pcbddc->local_primal_size = total_counts;
2272   pcbddc->n_vertices = n_vertices;
2273   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2274 
2275   /* Create constraint matrix */
2276   /* The constraint matrix is used to compute the l2g map of primal dofs */
2277   /* so we need to set it up properly either with or without change of basis */
2278   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2279   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2280   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2281   /* array to compute a local numbering of constraints : vertices first then constraints */
2282   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2283   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2284   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2285   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2286   /* auxiliary stuff for basis change */
2287   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2288   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2289 
2290   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2291   total_primal_vertices=0;
2292   for (i=0;i<pcbddc->local_primal_size;i++) {
2293     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2294     if (size_of_constraint == 1) {
2295       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2296       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2297       aux_primal_minloc[total_primal_vertices]=0;
2298       total_primal_vertices++;
2299     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2300       PetscInt min_loc,min_index;
2301       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2302       /* find first untouched local node */
2303       k = 0;
2304       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2305       min_index = global_indices[k];
2306       min_loc = k;
2307       /* search the minimum among global nodes already untouched on the cc */
2308       for (k=1;k<size_of_constraint;k++) {
2309         /* there can be more than one constraint on a single connected component */
2310         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2311           min_index = global_indices[k];
2312           min_loc = k;
2313         }
2314       }
2315       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2316       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2317       aux_primal_minloc[total_primal_vertices]=min_loc;
2318       total_primal_vertices++;
2319     }
2320   }
2321   /* determine if a QR strategy is needed for change of basis */
2322   qr_needed = PETSC_FALSE;
2323   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2324   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2325     if (PetscBTLookup(change_basis,i)) {
2326       if (!pcbddc->use_qr_single) {
2327         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2328         j = 0;
2329         for (k=0;k<size_of_constraint;k++) {
2330           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2331             j++;
2332           }
2333         }
2334         /* found more than one primal dof on the cc */
2335         if (j > 1) {
2336           PetscBTSet(qr_needed_idx,i);
2337           qr_needed = PETSC_TRUE;
2338         }
2339       } else {
2340         PetscBTSet(qr_needed_idx,i);
2341         qr_needed = PETSC_TRUE;
2342       }
2343     }
2344   }
2345   /* free workspace */
2346   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2347 
2348   /* permute indices in order to have a sorted set of vertices */
2349   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2350 
2351   /* nonzero structure of constraint matrix */
2352   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2353   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2354   j=total_primal_vertices;
2355   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2356     if (!PetscBTLookup(change_basis,i)) {
2357       nnz[j]=temp_indices[i+1]-temp_indices[i];
2358       j++;
2359     }
2360   }
2361   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2362   ierr = PetscFree(nnz);CHKERRQ(ierr);
2363   /* set values in constraint matrix */
2364   for (i=0;i<total_primal_vertices;i++) {
2365     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2366   }
2367   total_counts = total_primal_vertices;
2368   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2369     if (!PetscBTLookup(change_basis,i)) {
2370       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2371       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2372       total_counts++;
2373     }
2374   }
2375   /* assembling */
2376   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2377   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2378   /*
2379   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2380   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2381   */
2382   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2383   if (pcbddc->use_change_of_basis) {
2384     /* dual and primal dofs on a single cc */
2385     PetscInt     dual_dofs,primal_dofs;
2386     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2387     PetscInt     primal_counter;
2388     /* working stuff for GEQRF */
2389     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2390     PetscBLASInt lqr_work;
2391     /* working stuff for UNGQR */
2392     PetscScalar  *gqr_work,lgqr_work_t;
2393     PetscBLASInt lgqr_work;
2394     /* working stuff for TRTRS */
2395     PetscScalar  *trs_rhs;
2396     PetscBLASInt Blas_NRHS;
2397     /* pointers for values insertion into change of basis matrix */
2398     PetscInt     *start_rows,*start_cols;
2399     PetscScalar  *start_vals;
2400     /* working stuff for values insertion */
2401     PetscBT      is_primal;
2402     /* matrix sizes */
2403     PetscInt     global_size,local_size;
2404     /* temporary change of basis */
2405     Mat          localChangeOfBasisMatrix;
2406     /* extra space for debugging */
2407     PetscScalar  *dbg_work;
2408 
2409     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2410     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2411     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2412     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2413     /* nonzeros for local mat */
2414     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2415     for (i=0;i<pcis->n;i++) nnz[i]=1;
2416     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2417       if (PetscBTLookup(change_basis,i)) {
2418         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2419         if (PetscBTLookup(qr_needed_idx,i)) {
2420           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2421         } else {
2422           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2423           /* get local primal index on the cc */
2424           j = 0;
2425           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2426           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2427         }
2428       }
2429     }
2430     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2431     ierr = PetscFree(nnz);CHKERRQ(ierr);
2432     /* Set initial identity in the matrix */
2433     for (i=0;i<pcis->n;i++) {
2434       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2435     }
2436 
2437     if (pcbddc->dbg_flag) {
2438       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2439       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2440     }
2441 
2442 
2443     /* Now we loop on the constraints which need a change of basis */
2444     /*
2445        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2446        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2447 
2448        Basic blocks of change of basis matrix T computed by
2449 
2450           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2451 
2452             | 1        0   ...        0         s_1/S |
2453             | 0        1   ...        0         s_2/S |
2454             |              ...                        |
2455             | 0        ...            1     s_{n-1}/S |
2456             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2457 
2458             with S = \sum_{i=1}^n s_i^2
2459             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2460                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2461 
2462           - QR decomposition of constraints otherwise
2463     */
2464     if (qr_needed) {
2465       /* space to store Q */
2466       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2467       /* first we issue queries for optimal work */
2468       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2469       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2470       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2471       lqr_work = -1;
2472       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2473       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2474       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2475       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2476       lgqr_work = -1;
2477       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2478       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2479       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2480       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2481       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2482       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2483       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2484       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2485       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2486       /* array to store scaling factors for reflectors */
2487       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2488       /* array to store rhs and solution of triangular solver */
2489       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2490       /* allocating workspace for check */
2491       if (pcbddc->dbg_flag) {
2492         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2493       }
2494     }
2495     /* array to store whether a node is primal or not */
2496     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2497     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2498     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2499     if (i != total_primal_vertices) {
2500       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2501     }
2502     for (i=0;i<total_primal_vertices;i++) {
2503       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2504     }
2505     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2506 
2507     /* loop on constraints and see whether or not they need a change of basis and compute it */
2508     /* -> using implicit ordering contained in temp_indices data */
2509     total_counts = pcbddc->n_vertices;
2510     primal_counter = total_counts;
2511     while (total_counts<pcbddc->local_primal_size) {
2512       primal_dofs = 1;
2513       if (PetscBTLookup(change_basis,total_counts)) {
2514         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2515         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2516           primal_dofs++;
2517         }
2518         /* get constraint info */
2519         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2520         dual_dofs = size_of_constraint-primal_dofs;
2521 
2522         if (pcbddc->dbg_flag) {
2523           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2524         }
2525 
2526         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2527 
2528           /* copy quadrature constraints for change of basis check */
2529           if (pcbddc->dbg_flag) {
2530             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2531           }
2532           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2533           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2534 
2535           /* compute QR decomposition of constraints */
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2537           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2538           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2539           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2540           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2541           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2542           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2543 
2544           /* explictly compute R^-T */
2545           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2546           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2547           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2548           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2549           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2550           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2551           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2552           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2553           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2554           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2555 
2556           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2557           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2558           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2559           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2560           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2561           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2562           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2563           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2564           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2565 
2566           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2567              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2568              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2569           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2570           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2571           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2572           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2573           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2574           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2575           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2576           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2577           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2578           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2579 
2580           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2581           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2582           /* insert cols for primal dofs */
2583           for (j=0;j<primal_dofs;j++) {
2584             start_vals = &qr_basis[j*size_of_constraint];
2585             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2586             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2587           }
2588           /* insert cols for dual dofs */
2589           for (j=0,k=0;j<dual_dofs;k++) {
2590             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2591               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2592               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2593               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2594               j++;
2595             }
2596           }
2597 
2598           /* check change of basis */
2599           if (pcbddc->dbg_flag) {
2600             PetscInt   ii,jj;
2601             PetscBool valid_qr=PETSC_TRUE;
2602             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2603             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2604             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2605             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2606             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2607             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2608             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2609             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2610             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2611             for (jj=0;jj<size_of_constraint;jj++) {
2612               for (ii=0;ii<primal_dofs;ii++) {
2613                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2614                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2615               }
2616             }
2617             if (!valid_qr) {
2618               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2619               for (jj=0;jj<size_of_constraint;jj++) {
2620                 for (ii=0;ii<primal_dofs;ii++) {
2621                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2622                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2623                   }
2624                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2625                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2626                   }
2627                 }
2628               }
2629             } else {
2630               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2631             }
2632           }
2633         } else { /* simple transformation block */
2634           PetscInt    row,col;
2635           PetscScalar val,norm;
2636 
2637           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2638           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2639           for (j=0;j<size_of_constraint;j++) {
2640             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2641             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2642             if (!PetscBTLookup(is_primal,row_B)) {
2643               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2644               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2645               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2646             } else {
2647               for (k=0;k<size_of_constraint;k++) {
2648                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2649                 if (row != col) {
2650                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2651                 } else {
2652                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2653                 }
2654                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2655               }
2656             }
2657           }
2658           if (pcbddc->dbg_flag) {
2659             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2660           }
2661         }
2662         /* increment primal counter */
2663         primal_counter += primal_dofs;
2664       } else {
2665         if (pcbddc->dbg_flag) {
2666           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2667         }
2668       }
2669       /* increment constraint counter total_counts */
2670       total_counts += primal_dofs;
2671     }
2672 
2673     /* free workspace */
2674     if (qr_needed) {
2675       if (pcbddc->dbg_flag) {
2676         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2677       }
2678       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2679       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2680       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2681       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2682       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2683     }
2684     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2685     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2686     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2687 
2688     /* assembling of global change of variable */
2689     {
2690       Mat      tmat;
2691       PetscInt bs;
2692 
2693       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2694       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2695       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2696       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2697       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2698       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2699       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2700       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2701       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2702       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2703       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2704       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2705       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2706       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2707       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2708       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2709       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2710       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2711     }
2712     /* check */
2713     if (pcbddc->dbg_flag) {
2714       PetscReal error;
2715       Vec       x,x_change;
2716 
2717       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2718       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2719       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2720       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2721       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2724       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2725       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2726       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2727       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2728       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2729       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2730       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2731       ierr = VecDestroy(&x);CHKERRQ(ierr);
2732       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2733     }
2734 
2735     /* adapt sub_schurs computed (if any) */
2736     if (pcbddc->use_deluxe_scaling) {
2737       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2738       if (sub_schurs->n_subs_par_g) {
2739         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2740       }
2741       if (sub_schurs->S_Ej_all) {
2742         Mat S_1,S_2,tmat;
2743         IS is_all_N;
2744 
2745         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2746         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2747         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2748         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2749         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2750         sub_schurs->S_Ej_all = S_1;
2751         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2752         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2753         sub_schurs->sum_S_Ej_all = S_2;
2754         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2755       }
2756     }
2757     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2758   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2759     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2760     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2761   }
2762 
2763   /* set up change of basis context */
2764   if (pcbddc->ChangeOfBasisMatrix) {
2765     PCBDDCChange_ctx change_ctx;
2766 
2767     if (!pcbddc->new_global_mat) {
2768       PetscInt global_size,local_size;
2769 
2770       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2771       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2772       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2773       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2774       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2775       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2776       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2777       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2778       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2779     } else {
2780       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2781       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2782       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2783     }
2784     if (!pcbddc->user_ChangeOfBasisMatrix) {
2785       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2786       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2787     } else {
2788       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2789       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2790     }
2791     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2792     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2793     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2794     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2795   }
2796 
2797   /* get indices in local ordering for vertices and constraints */
2798   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2799     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2800     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2801   }
2802   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2803   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2804   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2805   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2806   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2807   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2808   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2809   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2810   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2811   /* set quantities in PCBDDC data struct */
2812   pcbddc->n_actual_vertices = i;
2813   /* check if a new primal space has been introduced */
2814   pcbddc->new_primal_space_local = PETSC_TRUE;
2815   if (olocal_primal_size == pcbddc->local_primal_size) {
2816     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2817     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2818     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2819   }
2820   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2821   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2822 
2823   /* flush dbg viewer */
2824   if (pcbddc->dbg_flag) {
2825     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2826   }
2827 
2828   /* free workspace */
2829   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2830   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2831   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2832   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2833   if (!pcbddc->adaptive_selection) {
2834     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2835     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2836   } else {
2837     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2838                       pcbddc->adaptive_constraints_ptrs,
2839                       pcbddc->adaptive_constraints_idxs,
2840                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2841     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2842   }
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2848 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2849 {
2850   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2851   PC_IS       *pcis = (PC_IS*)pc->data;
2852   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2853   PetscInt    ierr,i,vertex_size;
2854   PetscViewer viewer=pcbddc->dbg_viewer;
2855 
2856   PetscFunctionBegin;
2857   /* Reset previously computed graph */
2858   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2859   /* Init local Graph struct */
2860   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2861 
2862   /* Check validity of the csr graph passed in by the user */
2863   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2864     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2865   }
2866 
2867   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2868   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2869     PetscInt *xadj,*adjncy;
2870     PetscInt nvtxs;
2871 
2872     if (pcbddc->use_local_adj) {
2873       PetscBool flg_row=PETSC_FALSE;
2874 
2875       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2876       if (flg_row) {
2877         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2878         pcbddc->computed_rowadj = PETSC_TRUE;
2879       }
2880       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2881     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2882       IS                     is_dummy;
2883       ISLocalToGlobalMapping l2gmap_dummy;
2884       PetscInt               j,sum;
2885       PetscInt               *cxadj,*cadjncy;
2886       const PetscInt         *idxs;
2887       PCBDDCGraph            graph;
2888       PetscBT                is_on_boundary;
2889 
2890       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2891       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2892       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2893       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2894       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2895       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2896       graph->xadj = xadj;
2897       graph->adjncy = adjncy;
2898       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2899       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2900 
2901       if (pcbddc->dbg_flag) {
2902         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2903         for (i=0;i<graph->ncc;i++) {
2904           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2905         }
2906         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2907       }
2908 
2909       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2910       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2911       for (i=0;i<pcis->n_B;i++) {
2912         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2913       }
2914       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2915 
2916       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2917       sum = 0;
2918       for (i=0;i<graph->ncc;i++) {
2919         PetscInt sizecc = 0;
2920         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2921           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2922             sizecc++;
2923           }
2924         }
2925         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2926           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2927             cxadj[graph->queue[j]] = sizecc;
2928           }
2929         }
2930         sum += sizecc*sizecc;
2931       }
2932       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2933       sum = 0;
2934       for (i=0;i<nvtxs;i++) {
2935         PetscInt temp = cxadj[i];
2936         cxadj[i] = sum;
2937         sum += temp;
2938       }
2939       cxadj[nvtxs] = sum;
2940       for (i=0;i<graph->ncc;i++) {
2941         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2942           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2943             PetscInt k,sizecc = 0;
2944             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2945               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2946                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2947                 sizecc++;
2948               }
2949             }
2950           }
2951         }
2952       }
2953       if (nvtxs) {
2954         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2955       } else {
2956         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2957         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2958       }
2959       graph->xadj = 0;
2960       graph->adjncy = 0;
2961       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2962       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2963     }
2964   }
2965 
2966   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2967   vertex_size = 1;
2968   if (pcbddc->user_provided_isfordofs) {
2969     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2970       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2971       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2972         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2973         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2974       }
2975       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2976       pcbddc->n_ISForDofs = 0;
2977       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2978     }
2979     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2980     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2981   } else {
2982     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2983       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2984       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2985       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2986         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2987       }
2988     }
2989   }
2990 
2991   /* Setup of Graph */
2992   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2993     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2994   }
2995   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2996     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2997   }
2998   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2999 
3000   /* Graph's connected components analysis */
3001   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3002 
3003   /* print some info to stdout */
3004   if (pcbddc->dbg_flag) {
3005     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3006   }
3007 
3008   /* mark topography has done */
3009   pcbddc->recompute_topography = PETSC_FALSE;
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3015 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3016 {
3017   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3018   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3019   PetscErrorCode ierr;
3020 
3021   PetscFunctionBegin;
3022   n = 0;
3023   vertices = 0;
3024   if (pcbddc->ConstraintMatrix) {
3025     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3026     for (i=0;i<local_primal_size;i++) {
3027       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3028       if (size_of_constraint == 1) n++;
3029       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3030     }
3031     if (vertices_idx) {
3032       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3033       n = 0;
3034       for (i=0;i<local_primal_size;i++) {
3035         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3036         if (size_of_constraint == 1) {
3037           vertices[n++]=row_cmat_indices[0];
3038         }
3039         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3040       }
3041     }
3042   }
3043   *n_vertices = n;
3044   if (vertices_idx) *vertices_idx = vertices;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 #undef __FUNCT__
3049 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3050 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3051 {
3052   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3053   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3054   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3055   PetscBT        touched;
3056   PetscErrorCode ierr;
3057 
3058     /* This function assumes that the number of local constraints per connected component
3059        is not greater than the number of nodes defined for the connected component
3060        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3061   PetscFunctionBegin;
3062   n = 0;
3063   constraints_index = 0;
3064   if (pcbddc->ConstraintMatrix) {
3065     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3066     max_size_of_constraint = 0;
3067     for (i=0;i<local_primal_size;i++) {
3068       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3069       if (size_of_constraint > 1) {
3070         n++;
3071       }
3072       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3073       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3074     }
3075     if (constraints_idx) {
3076       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3077       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3078       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3079       n = 0;
3080       for (i=0;i<local_primal_size;i++) {
3081         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3082         if (size_of_constraint > 1) {
3083           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3084           /* find first untouched local node */
3085           j = 0;
3086           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3087           min_index = row_cmat_global_indices[j];
3088           min_loc = j;
3089           /* search the minimum among nodes not yet touched on the connected component
3090              since there can be more than one constraint on a single cc */
3091           for (j=1;j<size_of_constraint;j++) {
3092             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3093               min_index = row_cmat_global_indices[j];
3094               min_loc = j;
3095             }
3096           }
3097           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3098           constraints_index[n++] = row_cmat_indices[min_loc];
3099         }
3100         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3101       }
3102       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3103       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3104     }
3105   }
3106   *n_constraints = n;
3107   if (constraints_idx) *constraints_idx = constraints_index;
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 #undef __FUNCT__
3112 #define __FUNCT__ "PCBDDCSubsetNumbering"
3113 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3114 {
3115   Vec            local_vec,global_vec;
3116   IS             seqis,paris;
3117   VecScatter     scatter_ctx;
3118   PetscScalar    *array;
3119   PetscInt       *temp_global_dofs;
3120   PetscScalar    globalsum;
3121   PetscInt       i,j,s;
3122   PetscInt       nlocals,first_index,old_index,max_local;
3123   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3124   PetscMPIInt    *dof_sizes,*dof_displs;
3125   PetscBool      first_found;
3126   PetscErrorCode ierr;
3127 
3128   PetscFunctionBegin;
3129   /* mpi buffers */
3130   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3131   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3132   j = ( !rank_prec_comm ? size_prec_comm : 0);
3133   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3134   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3135   /* get maximum size of subset */
3136   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3137   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3138   max_local = 0;
3139   for (i=0;i<n_local_dofs;i++) {
3140     if (max_local < temp_global_dofs[i] ) {
3141       max_local = temp_global_dofs[i];
3142     }
3143   }
3144   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3145   max_global++;
3146   max_local = 0;
3147   for (i=0;i<n_local_dofs;i++) {
3148     if (max_local < local_dofs[i] ) {
3149       max_local = local_dofs[i];
3150     }
3151   }
3152   max_local++;
3153   /* allocate workspace */
3154   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3155   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3156   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3157   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3158   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3159   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3160   /* create scatter */
3161   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3162   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3163   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3164   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3165   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3166   /* init array */
3167   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3168   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3169   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3170   if (local_dofs_mult) {
3171     for (i=0;i<n_local_dofs;i++) {
3172       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3173     }
3174   } else {
3175     for (i=0;i<n_local_dofs;i++) {
3176       array[local_dofs[i]]=1.0;
3177     }
3178   }
3179   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3180   /* scatter into global vec and get total number of global dofs */
3181   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3182   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3183   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3184   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3185   /* Fill global_vec with cumulative function for global numbering */
3186   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3187   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3188   nlocals = 0;
3189   first_index = -1;
3190   first_found = PETSC_FALSE;
3191   for (i=0;i<s;i++) {
3192     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3193       first_found = PETSC_TRUE;
3194       first_index = i;
3195     }
3196     nlocals += (PetscInt)PetscRealPart(array[i]);
3197   }
3198   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3199   if (!rank_prec_comm) {
3200     dof_displs[0]=0;
3201     for (i=1;i<size_prec_comm;i++) {
3202       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3203     }
3204   }
3205   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3206   if (first_found) {
3207     array[first_index] += (PetscScalar)nlocals;
3208     old_index = first_index;
3209     for (i=first_index+1;i<s;i++) {
3210       if (PetscRealPart(array[i]) > 0.1) {
3211         array[i] += array[old_index];
3212         old_index = i;
3213       }
3214     }
3215   }
3216   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3217   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3218   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3219   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3220   /* get global ordering of local dofs */
3221   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3222   if (local_dofs_mult) {
3223     for (i=0;i<n_local_dofs;i++) {
3224       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3225     }
3226   } else {
3227     for (i=0;i<n_local_dofs;i++) {
3228       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3229     }
3230   }
3231   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3232   /* free workspace */
3233   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3234   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3235   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3236   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3237   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3238   /* return pointer to global ordering of local dofs */
3239   *global_numbering_subset = temp_global_dofs;
3240   PetscFunctionReturn(0);
3241 }
3242 
3243 #undef __FUNCT__
3244 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3245 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3246 {
3247   PetscInt       i,j;
3248   PetscScalar    *alphas;
3249   PetscErrorCode ierr;
3250 
3251   PetscFunctionBegin;
3252   /* this implements stabilized Gram-Schmidt */
3253   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3254   for (i=0;i<n;i++) {
3255     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3256     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3257     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3258   }
3259   ierr = PetscFree(alphas);CHKERRQ(ierr);
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatISGetSubassemblingPattern"
3265 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3266 {
3267   Mat             subdomain_adj;
3268   IS              new_ranks,ranks_send_to;
3269   MatPartitioning partitioner;
3270   Mat_IS          *matis;
3271   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3272   PetscInt        prank;
3273   PetscMPIInt     size,rank,color;
3274   PetscInt        *xadj,*adjncy,*oldranks;
3275   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3276   PetscInt        i,local_size,threshold=0;
3277   PetscErrorCode  ierr;
3278   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3279   PetscSubcomm    subcomm;
3280 
3281   PetscFunctionBegin;
3282   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3283   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3284   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3285 
3286   /* Get info on mapping */
3287   matis = (Mat_IS*)(mat->data);
3288   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3289   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3290 
3291   /* build local CSR graph of subdomains' connectivity */
3292   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3293   xadj[0] = 0;
3294   xadj[1] = PetscMax(n_neighs-1,0);
3295   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3296   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3297 
3298   if (threshold) {
3299     PetscInt xadj_count = 0;
3300     for (i=1;i<n_neighs;i++) {
3301       if (n_shared[i] > threshold) {
3302         adjncy[xadj_count] = neighs[i];
3303         adjncy_wgt[xadj_count] = n_shared[i];
3304         xadj_count++;
3305       }
3306     }
3307     xadj[1] = xadj_count;
3308   } else {
3309     if (xadj[1]) {
3310       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3311       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3312     }
3313   }
3314   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3315   if (use_square) {
3316     for (i=0;i<xadj[1];i++) {
3317       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3318     }
3319   }
3320   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3321 
3322   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3323 
3324   /*
3325     Restrict work on active processes only.
3326   */
3327   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3328   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3329   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3330   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3331   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3332   if (color) {
3333     ierr = PetscFree(xadj);CHKERRQ(ierr);
3334     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3335     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3336   } else {
3337     PetscInt coarsening_ratio;
3338     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3339     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3340     prank = rank;
3341     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3342     /*
3343     for (i=0;i<size;i++) {
3344       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3345     }
3346     */
3347     for (i=0;i<xadj[1];i++) {
3348       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3349     }
3350     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3351     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3352     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3353 
3354     /* Partition */
3355     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3356     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3357     if (use_vwgt) {
3358       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3359       v_wgt[0] = local_size;
3360       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3361     }
3362     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3363     coarsening_ratio = size/n_subdomains;
3364     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3365     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3366     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3367     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3368 
3369     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3370     if (contiguous) {
3371       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3372     } else {
3373       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3374     }
3375     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3376     /* clean up */
3377     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3378     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3379     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3380     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3381   }
3382   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3383 
3384   /* assemble parallel IS for sends */
3385   i = 1;
3386   if (color) i=0;
3387   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3388 
3389   /* get back IS */
3390   *is_sends = ranks_send_to;
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3395 
3396 #undef __FUNCT__
3397 #define __FUNCT__ "MatISSubassemble"
3398 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3399 {
3400   Mat                    local_mat;
3401   Mat_IS                 *matis;
3402   IS                     is_sends_internal;
3403   PetscInt               rows,cols,new_local_rows;
3404   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3405   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3406   ISLocalToGlobalMapping l2gmap;
3407   PetscInt*              l2gmap_indices;
3408   const PetscInt*        is_indices;
3409   MatType                new_local_type;
3410   /* buffers */
3411   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3412   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3413   PetscInt               *recv_buffer_idxs_local;
3414   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3415   /* MPI */
3416   MPI_Comm               comm,comm_n;
3417   PetscSubcomm           subcomm;
3418   PetscMPIInt            n_sends,n_recvs,commsize;
3419   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3420   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3421   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3422   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3423   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3424   PetscErrorCode         ierr;
3425 
3426   PetscFunctionBegin;
3427   /* TODO: add missing checks */
3428   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3429   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3430   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3431   PetscValidLogicalCollectiveInt(mat,nis,7);
3432   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3433   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3434   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3435   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3436   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3437   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3438   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3439   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3440     PetscInt mrows,mcols,mnrows,mncols;
3441     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3442     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3443     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3444     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3445     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3446     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3447   }
3448   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3449   PetscValidLogicalCollectiveInt(mat,bs,0);
3450   /* prepare IS for sending if not provided */
3451   if (!is_sends) {
3452     PetscBool pcontig = PETSC_TRUE;
3453     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3454     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3455   } else {
3456     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3457     is_sends_internal = is_sends;
3458   }
3459 
3460   /* get pointer of MATIS data */
3461   matis = (Mat_IS*)mat->data;
3462 
3463   /* get comm */
3464   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3465 
3466   /* compute number of sends */
3467   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3468   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3469 
3470   /* compute number of receives */
3471   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3472   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3473   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3474   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3475   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3476   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3477   ierr = PetscFree(iflags);CHKERRQ(ierr);
3478 
3479   /* restrict comm if requested */
3480   subcomm = 0;
3481   destroy_mat = PETSC_FALSE;
3482   if (restrict_comm) {
3483     PetscMPIInt color,subcommsize;
3484 
3485     color = 0;
3486     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3487     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3488     subcommsize = commsize - subcommsize;
3489     /* check if reuse has been requested */
3490     if (reuse == MAT_REUSE_MATRIX) {
3491       if (*mat_n) {
3492         PetscMPIInt subcommsize2;
3493         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3494         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3495         comm_n = PetscObjectComm((PetscObject)*mat_n);
3496       } else {
3497         comm_n = PETSC_COMM_SELF;
3498       }
3499     } else { /* MAT_INITIAL_MATRIX */
3500       PetscMPIInt rank;
3501 
3502       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3503       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3504       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3505       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3506       comm_n = PetscSubcommChild(subcomm);
3507     }
3508     /* flag to destroy *mat_n if not significative */
3509     if (color) destroy_mat = PETSC_TRUE;
3510   } else {
3511     comm_n = comm;
3512   }
3513 
3514   /* prepare send/receive buffers */
3515   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3516   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3517   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3518   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3519   if (nis) {
3520     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3521   }
3522 
3523   /* Get data from local matrices */
3524   if (!isdense) {
3525     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3526     /* TODO: See below some guidelines on how to prepare the local buffers */
3527     /*
3528        send_buffer_vals should contain the raw values of the local matrix
3529        send_buffer_idxs should contain:
3530        - MatType_PRIVATE type
3531        - PetscInt        size_of_l2gmap
3532        - PetscInt        global_row_indices[size_of_l2gmap]
3533        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3534     */
3535   } else {
3536     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3537     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3538     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3539     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3540     send_buffer_idxs[1] = i;
3541     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3542     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3543     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3544     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3545     for (i=0;i<n_sends;i++) {
3546       ilengths_vals[is_indices[i]] = len*len;
3547       ilengths_idxs[is_indices[i]] = len+2;
3548     }
3549   }
3550   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3551   /* additional is (if any) */
3552   if (nis) {
3553     PetscMPIInt psum;
3554     PetscInt j;
3555     for (j=0,psum=0;j<nis;j++) {
3556       PetscInt plen;
3557       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3558       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3559       psum += len+1; /* indices + lenght */
3560     }
3561     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3562     for (j=0,psum=0;j<nis;j++) {
3563       PetscInt plen;
3564       const PetscInt *is_array_idxs;
3565       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3566       send_buffer_idxs_is[psum] = plen;
3567       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3568       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3569       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3570       psum += plen+1; /* indices + lenght */
3571     }
3572     for (i=0;i<n_sends;i++) {
3573       ilengths_idxs_is[is_indices[i]] = psum;
3574     }
3575     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3576   }
3577 
3578   buf_size_idxs = 0;
3579   buf_size_vals = 0;
3580   buf_size_idxs_is = 0;
3581   for (i=0;i<n_recvs;i++) {
3582     buf_size_idxs += (PetscInt)olengths_idxs[i];
3583     buf_size_vals += (PetscInt)olengths_vals[i];
3584     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3585   }
3586   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3587   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3588   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3589 
3590   /* get new tags for clean communications */
3591   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3592   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3593   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3594 
3595   /* allocate for requests */
3596   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3597   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3598   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3599   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3600   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3601   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3602 
3603   /* communications */
3604   ptr_idxs = recv_buffer_idxs;
3605   ptr_vals = recv_buffer_vals;
3606   ptr_idxs_is = recv_buffer_idxs_is;
3607   for (i=0;i<n_recvs;i++) {
3608     source_dest = onodes[i];
3609     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3610     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3611     ptr_idxs += olengths_idxs[i];
3612     ptr_vals += olengths_vals[i];
3613     if (nis) {
3614       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3615       ptr_idxs_is += olengths_idxs_is[i];
3616     }
3617   }
3618   for (i=0;i<n_sends;i++) {
3619     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3620     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3621     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3622     if (nis) {
3623       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3624     }
3625   }
3626   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3627   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3628 
3629   /* assemble new l2g map */
3630   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3631   ptr_idxs = recv_buffer_idxs;
3632   new_local_rows = 0;
3633   for (i=0;i<n_recvs;i++) {
3634     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3635     ptr_idxs += olengths_idxs[i];
3636   }
3637   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3638   ptr_idxs = recv_buffer_idxs;
3639   new_local_rows = 0;
3640   for (i=0;i<n_recvs;i++) {
3641     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3642     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3643     ptr_idxs += olengths_idxs[i];
3644   }
3645   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3646   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3647   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3648 
3649   /* infer new local matrix type from received local matrices type */
3650   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3651   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3652   if (n_recvs) {
3653     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3654     ptr_idxs = recv_buffer_idxs;
3655     for (i=0;i<n_recvs;i++) {
3656       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3657         new_local_type_private = MATAIJ_PRIVATE;
3658         break;
3659       }
3660       ptr_idxs += olengths_idxs[i];
3661     }
3662     switch (new_local_type_private) {
3663       case MATDENSE_PRIVATE:
3664         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3665           new_local_type = MATSEQAIJ;
3666           bs = 1;
3667         } else { /* if I receive only 1 dense matrix */
3668           new_local_type = MATSEQDENSE;
3669           bs = 1;
3670         }
3671         break;
3672       case MATAIJ_PRIVATE:
3673         new_local_type = MATSEQAIJ;
3674         bs = 1;
3675         break;
3676       case MATBAIJ_PRIVATE:
3677         new_local_type = MATSEQBAIJ;
3678         break;
3679       case MATSBAIJ_PRIVATE:
3680         new_local_type = MATSEQSBAIJ;
3681         break;
3682       default:
3683         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3684         break;
3685     }
3686   } else { /* by default, new_local_type is seqdense */
3687     new_local_type = MATSEQDENSE;
3688     bs = 1;
3689   }
3690 
3691   /* create MATIS object if needed */
3692   if (reuse == MAT_INITIAL_MATRIX) {
3693     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3694     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3695   } else {
3696     /* it also destroys the local matrices */
3697     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3698   }
3699   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3700   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3701 
3702   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3703 
3704   /* Global to local map of received indices */
3705   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3706   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3707   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3708 
3709   /* restore attributes -> type of incoming data and its size */
3710   buf_size_idxs = 0;
3711   for (i=0;i<n_recvs;i++) {
3712     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3713     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3714     buf_size_idxs += (PetscInt)olengths_idxs[i];
3715   }
3716   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3717 
3718   /* set preallocation */
3719   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3720   if (!newisdense) {
3721     PetscInt *new_local_nnz=0;
3722 
3723     ptr_vals = recv_buffer_vals;
3724     ptr_idxs = recv_buffer_idxs_local;
3725     if (n_recvs) {
3726       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3727     }
3728     for (i=0;i<n_recvs;i++) {
3729       PetscInt j;
3730       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3731         for (j=0;j<*(ptr_idxs+1);j++) {
3732           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3733         }
3734       } else {
3735         /* TODO */
3736       }
3737       ptr_idxs += olengths_idxs[i];
3738     }
3739     if (new_local_nnz) {
3740       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3741       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3742       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3743       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3744       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3745       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3746     } else {
3747       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3748     }
3749     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3750   } else {
3751     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3752   }
3753 
3754   /* set values */
3755   ptr_vals = recv_buffer_vals;
3756   ptr_idxs = recv_buffer_idxs_local;
3757   for (i=0;i<n_recvs;i++) {
3758     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3759       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3760       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3761       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3762       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3763       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3764     } else {
3765       /* TODO */
3766     }
3767     ptr_idxs += olengths_idxs[i];
3768     ptr_vals += olengths_vals[i];
3769   }
3770   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3771   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3773   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3774   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3775   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3776 
3777 #if 0
3778   if (!restrict_comm) { /* check */
3779     Vec       lvec,rvec;
3780     PetscReal infty_error;
3781 
3782     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3783     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3784     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3785     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3786     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3787     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3788     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3789     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3790     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3791   }
3792 #endif
3793 
3794   /* assemble new additional is (if any) */
3795   if (nis) {
3796     PetscInt **temp_idxs,*count_is,j,psum;
3797 
3798     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3799     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3800     ptr_idxs = recv_buffer_idxs_is;
3801     psum = 0;
3802     for (i=0;i<n_recvs;i++) {
3803       for (j=0;j<nis;j++) {
3804         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3805         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3806         psum += plen;
3807         ptr_idxs += plen+1; /* shift pointer to received data */
3808       }
3809     }
3810     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3811     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3812     for (i=1;i<nis;i++) {
3813       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3814     }
3815     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3816     ptr_idxs = recv_buffer_idxs_is;
3817     for (i=0;i<n_recvs;i++) {
3818       for (j=0;j<nis;j++) {
3819         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3820         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3821         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3822         ptr_idxs += plen+1; /* shift pointer to received data */
3823       }
3824     }
3825     for (i=0;i<nis;i++) {
3826       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3827       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3828       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3829     }
3830     ierr = PetscFree(count_is);CHKERRQ(ierr);
3831     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3832     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3833   }
3834   /* free workspace */
3835   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3836   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3837   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3838   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3839   if (isdense) {
3840     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3841     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3842   } else {
3843     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3844   }
3845   if (nis) {
3846     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3847     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3848   }
3849   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3850   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3851   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3852   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3853   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3854   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3855   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3856   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3857   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3858   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3859   ierr = PetscFree(onodes);CHKERRQ(ierr);
3860   if (nis) {
3861     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3862     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3863     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3864   }
3865   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3866   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3867     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3868     for (i=0;i<nis;i++) {
3869       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3870     }
3871   }
3872   PetscFunctionReturn(0);
3873 }
3874 
3875 /* temporary hack into ksp private data structure */
3876 #include <petsc-private/kspimpl.h>
3877 
3878 #undef __FUNCT__
3879 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3880 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3881 {
3882   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3883   PC_IS                  *pcis = (PC_IS*)pc->data;
3884   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3885   MatNullSpace           CoarseNullSpace=NULL;
3886   ISLocalToGlobalMapping coarse_islg;
3887   IS                     coarse_is,*isarray;
3888   PetscInt               i,im_active=-1,active_procs=-1;
3889   PetscInt               nis,nisdofs,nisneu;
3890   PC                     pc_temp;
3891   PCType                 coarse_pc_type;
3892   KSPType                coarse_ksp_type;
3893   PetscBool              multilevel_requested,multilevel_allowed;
3894   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3895   Mat                    t_coarse_mat_is;
3896   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3897   PetscMPIInt            all_procs;
3898   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3899   PetscBool              compute_vecs = PETSC_FALSE;
3900   PetscScalar            *array;
3901   PetscErrorCode         ierr;
3902 
3903   PetscFunctionBegin;
3904   /* Assign global numbering to coarse dofs */
3905   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3906     PetscInt ocoarse_size;
3907     compute_vecs = PETSC_TRUE;
3908     ocoarse_size = pcbddc->coarse_size;
3909     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3910     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3911     /* see if we can avoid some work */
3912     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3913       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3914         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3915         coarse_reuse = PETSC_FALSE;
3916       } else { /* we can safely reuse already computed coarse matrix */
3917         coarse_reuse = PETSC_TRUE;
3918       }
3919     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3920       coarse_reuse = PETSC_FALSE;
3921     }
3922     /* reset any subassembling information */
3923     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3924     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3925   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3926     coarse_reuse = PETSC_TRUE;
3927   }
3928 
3929   /* count "active" (i.e. with positive local size) and "void" processes */
3930   im_active = !!(pcis->n);
3931   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3932   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3933   void_procs = all_procs-active_procs;
3934   csin_type_simple = PETSC_TRUE;
3935   redist = PETSC_FALSE;
3936   if (pcbddc->current_level && void_procs) {
3937     csin_ml = PETSC_TRUE;
3938     ncoarse_ml = void_procs;
3939     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3940     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3941       csin_ds = PETSC_TRUE;
3942       ncoarse_ds = pcbddc->redistribute_coarse;
3943       redist = PETSC_TRUE;
3944     } else {
3945       csin_ds = PETSC_TRUE;
3946       ncoarse_ds = active_procs;
3947       redist = PETSC_TRUE;
3948     }
3949   } else {
3950     csin_ml = PETSC_FALSE;
3951     ncoarse_ml = all_procs;
3952     if (void_procs) {
3953       csin_ds = PETSC_TRUE;
3954       ncoarse_ds = void_procs;
3955       csin_type_simple = PETSC_FALSE;
3956     } else {
3957       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3958         csin_ds = PETSC_TRUE;
3959         ncoarse_ds = pcbddc->redistribute_coarse;
3960         redist = PETSC_TRUE;
3961       } else {
3962         csin_ds = PETSC_FALSE;
3963         ncoarse_ds = all_procs;
3964       }
3965     }
3966   }
3967 
3968   /*
3969     test if we can go multilevel: three conditions must be satisfied:
3970     - we have not exceeded the number of levels requested
3971     - we can actually subassemble the active processes
3972     - we can find a suitable number of MPI processes where we can place the subassembled problem
3973   */
3974   multilevel_allowed = PETSC_FALSE;
3975   multilevel_requested = PETSC_FALSE;
3976   if (pcbddc->current_level < pcbddc->max_levels) {
3977     multilevel_requested = PETSC_TRUE;
3978     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3979       multilevel_allowed = PETSC_FALSE;
3980     } else {
3981       multilevel_allowed = PETSC_TRUE;
3982     }
3983   }
3984   /* determine number of process partecipating to coarse solver */
3985   if (multilevel_allowed) {
3986     ncoarse = ncoarse_ml;
3987     csin = csin_ml;
3988     redist = PETSC_FALSE;
3989   } else {
3990     ncoarse = ncoarse_ds;
3991     csin = csin_ds;
3992   }
3993 
3994   /* creates temporary l2gmap and IS for coarse indexes */
3995   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3996   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3997 
3998   /* creates temporary MATIS object for coarse matrix */
3999   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4000   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4001   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4002   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4003 #if 0
4004   {
4005     PetscViewer viewer;
4006     char filename[256];
4007     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4008     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4009     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4010     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4011     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4012   }
4013 #endif
4014   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4015   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4016   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4017   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4018   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4019 
4020   /* compute dofs splitting and neumann boundaries for coarse dofs */
4021   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4022     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4023     const PetscInt         *idxs;
4024     ISLocalToGlobalMapping tmap;
4025 
4026     /* create map between primal indices (in local representative ordering) and local primal numbering */
4027     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4028     /* allocate space for temporary storage */
4029     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4030     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4031     /* allocate for IS array */
4032     nisdofs = pcbddc->n_ISForDofsLocal;
4033     nisneu = !!pcbddc->NeumannBoundariesLocal;
4034     nis = nisdofs + nisneu;
4035     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4036     /* dofs splitting */
4037     for (i=0;i<nisdofs;i++) {
4038       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4039       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4040       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4041       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4042       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4043       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4044       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4045       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4046     }
4047     /* neumann boundaries */
4048     if (pcbddc->NeumannBoundariesLocal) {
4049       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4050       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4051       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4052       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4053       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4054       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4055       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4056       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4057     }
4058     /* free memory */
4059     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4060     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4061     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4062   } else {
4063     nis = 0;
4064     nisdofs = 0;
4065     nisneu = 0;
4066     isarray = NULL;
4067   }
4068   /* destroy no longer needed map */
4069   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4070 
4071   /* restrict on coarse candidates (if needed) */
4072   coarse_mat_is = NULL;
4073   if (csin) {
4074     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4075       if (redist) {
4076         PetscMPIInt rank;
4077         PetscInt    spc,n_spc_p1,dest[1],destsize;
4078 
4079         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4080         spc = active_procs/ncoarse;
4081         n_spc_p1 = active_procs%ncoarse;
4082         if (im_active) {
4083           destsize = 1;
4084           if (rank > n_spc_p1*(spc+1)-1) {
4085             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4086           } else {
4087             dest[0] = rank/(spc+1);
4088           }
4089         } else {
4090           destsize = 0;
4091         }
4092         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4093       } else if (csin_type_simple) {
4094         PetscMPIInt rank;
4095         PetscInt    issize,isidx;
4096 
4097         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4098         if (im_active) {
4099           issize = 1;
4100           isidx = (PetscInt)rank;
4101         } else {
4102           issize = 0;
4103           isidx = -1;
4104         }
4105         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4106       } else { /* get a suitable subassembling pattern from MATIS code */
4107         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4108       }
4109 
4110       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4111       if (!redist || ncoarse <= void_procs) {
4112         PetscInt ncoarse_cand,tissize,*nisindices;
4113         PetscInt *coarse_candidates;
4114         const PetscInt* tisindices;
4115 
4116         /* get coarse candidates' ranks in pc communicator */
4117         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4118         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4119         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4120           if (!coarse_candidates[i]) {
4121             coarse_candidates[ncoarse_cand++]=i;
4122           }
4123         }
4124         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4125 
4126 
4127         if (pcbddc->dbg_flag) {
4128           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4129           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4130           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4131           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4132           for (i=0;i<ncoarse_cand;i++) {
4133             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4134           }
4135           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4136           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4137         }
4138         /* shift the pattern on coarse candidates */
4139         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4140         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4141         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4142         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4143         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4144         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4145         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4146       }
4147       if (pcbddc->dbg_flag) {
4148         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4149         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4150         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4151         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4152       }
4153     }
4154     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4155     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4156   } else {
4157     if (pcbddc->dbg_flag) {
4158       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4159       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4160       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4161     }
4162     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4163     coarse_mat_is = t_coarse_mat_is;
4164   }
4165 
4166   /* create local to global scatters for coarse problem */
4167   if (compute_vecs) {
4168     PetscInt lrows;
4169     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4170     if (coarse_mat_is) {
4171       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4172     } else {
4173       lrows = 0;
4174     }
4175     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4176     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4177     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4178     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4179     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4180   }
4181   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4182   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4183 
4184   /* set defaults for coarse KSP and PC */
4185   if (multilevel_allowed) {
4186     coarse_ksp_type = KSPRICHARDSON;
4187     coarse_pc_type = PCBDDC;
4188   } else {
4189     coarse_ksp_type = KSPPREONLY;
4190     coarse_pc_type = PCREDUNDANT;
4191   }
4192 
4193   /* print some info if requested */
4194   if (pcbddc->dbg_flag) {
4195     if (!multilevel_allowed) {
4196       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4197       if (multilevel_requested) {
4198         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4199       } else if (pcbddc->max_levels) {
4200         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4201       }
4202       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4203     }
4204   }
4205 
4206   /* create the coarse KSP object only once with defaults */
4207   if (coarse_mat_is) {
4208     MatReuse coarse_mat_reuse;
4209     PetscViewer dbg_viewer = NULL;
4210     if (pcbddc->dbg_flag) {
4211       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4212       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4213     }
4214     if (!pcbddc->coarse_ksp) {
4215       char prefix[256],str_level[16];
4216       size_t len;
4217       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4218       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4219       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4220       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4221       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4222       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4223       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4224       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4225       /* prefix */
4226       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4227       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4228       if (!pcbddc->current_level) {
4229         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4230         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4231       } else {
4232         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4233         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4234         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4235         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4236         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4237         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4238       }
4239       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4240       /* allow user customization */
4241       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4242       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4243     }
4244 
4245     /* get some info after set from options */
4246     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4247     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4248     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4249     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4250     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4251       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4252       isbddc = PETSC_FALSE;
4253     }
4254     if (isredundant) {
4255       KSP inner_ksp;
4256       PC inner_pc;
4257       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4258       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4259       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4260     }
4261 
4262     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4263     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4264     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4265     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4266     if (nisdofs) {
4267       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4268       for (i=0;i<nisdofs;i++) {
4269         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4270       }
4271     }
4272     if (nisneu) {
4273       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4274       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4275     }
4276 
4277     /* assemble coarse matrix */
4278     if (coarse_reuse) {
4279       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4280       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4281       coarse_mat_reuse = MAT_REUSE_MATRIX;
4282     } else {
4283       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4284     }
4285     if (isbddc || isnn) {
4286       if (pcbddc->coarsening_ratio > 1) {
4287         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4288           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4289           if (pcbddc->dbg_flag) {
4290             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4291             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4292             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4293             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4294           }
4295         }
4296         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4297       } else {
4298         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4299         coarse_mat = coarse_mat_is;
4300       }
4301     } else {
4302       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4303     }
4304     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4305 
4306     /* propagate symmetry info to coarse matrix */
4307     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4308     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4309 
4310     /* set operators */
4311     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4312     if (pcbddc->dbg_flag) {
4313       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4314     }
4315   } else { /* processes non partecipating to coarse solver (if any) */
4316     coarse_mat = 0;
4317   }
4318   ierr = PetscFree(isarray);CHKERRQ(ierr);
4319 #if 0
4320   {
4321     PetscViewer viewer;
4322     char filename[256];
4323     sprintf(filename,"coarse_mat.m");
4324     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4325     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4326     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4327     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4328   }
4329 #endif
4330 
4331   /* Compute coarse null space (special handling by BDDC only) */
4332   if (pcbddc->NullSpace) {
4333     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4334   }
4335 
4336   if (pcbddc->coarse_ksp) {
4337     Vec crhs,csol;
4338     PetscBool ispreonly;
4339     if (CoarseNullSpace) {
4340       if (isbddc) {
4341         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4342       } else {
4343         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4344       }
4345     }
4346     /* setup coarse ksp */
4347     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4348     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4349     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4350     /* hack */
4351     if (!csol) {
4352       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4353     }
4354     if (!crhs) {
4355       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4356     }
4357     /* Check coarse problem if in debug mode or if solving with an iterative method */
4358     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4359     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4360       KSP       check_ksp;
4361       KSPType   check_ksp_type;
4362       PC        check_pc;
4363       Vec       check_vec,coarse_vec;
4364       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4365       PetscInt  its;
4366       PetscBool compute_eigs;
4367       PetscReal *eigs_r,*eigs_c;
4368       PetscInt  neigs;
4369       const char *prefix;
4370 
4371       /* Create ksp object suitable for estimation of extreme eigenvalues */
4372       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4373       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4374       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4375       if (ispreonly) {
4376         check_ksp_type = KSPPREONLY;
4377         compute_eigs = PETSC_FALSE;
4378       } else {
4379         check_ksp_type = KSPGMRES;
4380         compute_eigs = PETSC_TRUE;
4381       }
4382       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4383       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4384       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4385       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4386       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4387       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4388       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4389       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4390       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4391       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4392       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4393       /* create random vec */
4394       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4395       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4396       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4397       if (CoarseNullSpace) {
4398         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4399       }
4400       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4401       /* solve coarse problem */
4402       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4403       if (CoarseNullSpace) {
4404         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4405       }
4406       /* set eigenvalue estimation if preonly has not been requested */
4407       if (compute_eigs) {
4408         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4409         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4410         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4411         lambda_max = eigs_r[neigs-1];
4412         lambda_min = eigs_r[0];
4413         if (pcbddc->use_coarse_estimates) {
4414           if (lambda_max>lambda_min) {
4415             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4416             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4417           }
4418         }
4419       }
4420 
4421       /* check coarse problem residual error */
4422       if (pcbddc->dbg_flag) {
4423         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4424         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4425         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4426         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4427         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4428         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4429         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4430         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4431         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4432         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4433         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4434         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4435         if (compute_eigs) {
4436           PetscReal lambda_max_s,lambda_min_s;
4437           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4438           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4439           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4440           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4441           for (i=0;i<neigs;i++) {
4442             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4443           }
4444         }
4445         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4446         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4447       }
4448       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4449       if (compute_eigs) {
4450         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4451         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4452       }
4453     }
4454   }
4455   /* print additional info */
4456   if (pcbddc->dbg_flag) {
4457     /* waits until all processes reaches this point */
4458     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4459     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4460     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4461   }
4462 
4463   /* free memory */
4464   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4465   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4466   PetscFunctionReturn(0);
4467 }
4468 
4469 #undef __FUNCT__
4470 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4471 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4472 {
4473   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4474   PC_IS*         pcis = (PC_IS*)pc->data;
4475   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4476   PetscInt       i,coarse_size;
4477   PetscInt       *local_primal_indices;
4478   PetscErrorCode ierr;
4479 
4480   PetscFunctionBegin;
4481   /* Compute global number of coarse dofs */
4482   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4483     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4484   }
4485   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4486 
4487   /* check numbering */
4488   if (pcbddc->dbg_flag) {
4489     PetscScalar coarsesum,*array;
4490     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4491 
4492     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4493     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4494     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4495     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4496     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4497     for (i=0;i<pcbddc->local_primal_size;i++) {
4498       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4499     }
4500     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4501     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4502     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4503     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4504     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4505     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4506     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4507     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4508     for (i=0;i<pcis->n;i++) {
4509       if (array[i] == 1.0) {
4510         set_error = PETSC_TRUE;
4511         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4512       }
4513     }
4514     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4515     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4516     for (i=0;i<pcis->n;i++) {
4517       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4518     }
4519     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4520     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4521     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4522     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4523     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4524     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4525     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4526       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4527       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4528       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4529       for (i=0;i<pcbddc->local_primal_size;i++) {
4530         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4531       }
4532       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4533     }
4534     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4535     if (set_error_reduced) {
4536       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4537     }
4538   }
4539   /* get back data */
4540   *coarse_size_n = coarse_size;
4541   *local_primal_indices_n = local_primal_indices;
4542   PetscFunctionReturn(0);
4543 }
4544 
4545 #undef __FUNCT__
4546 #define __FUNCT__ "PCBDDCGlobalToLocal"
4547 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4548 {
4549   IS             localis_t;
4550   PetscInt       i,lsize,*idxs,n;
4551   PetscScalar    *vals;
4552   PetscErrorCode ierr;
4553 
4554   PetscFunctionBegin;
4555   /* get indices in local ordering exploiting local to global map */
4556   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4557   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4558   for (i=0;i<lsize;i++) vals[i] = 1.0;
4559   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4560   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4561   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4562   if (idxs) { /* multilevel guard */
4563     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4564   }
4565   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4566   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4567   ierr = PetscFree(vals);CHKERRQ(ierr);
4568   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4569   /* now compute set in local ordering */
4570   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4571   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4572   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4573   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4574   for (i=0,lsize=0;i<n;i++) {
4575     if (PetscRealPart(vals[i]) > 0.5) {
4576       lsize++;
4577     }
4578   }
4579   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4580   for (i=0,lsize=0;i<n;i++) {
4581     if (PetscRealPart(vals[i]) > 0.5) {
4582       idxs[lsize++] = i;
4583     }
4584   }
4585   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4586   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4587   *localis = localis_t;
4588   PetscFunctionReturn(0);
4589 }
4590 
4591 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4592 #undef __FUNCT__
4593 #define __FUNCT__ "PCBDDCMatMult_Private"
4594 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4595 {
4596   PCBDDCChange_ctx change_ctx;
4597   PetscErrorCode   ierr;
4598 
4599   PetscFunctionBegin;
4600   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4601   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4602   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4603   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4604   PetscFunctionReturn(0);
4605 }
4606 
4607 #undef __FUNCT__
4608 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4609 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4610 {
4611   PCBDDCChange_ctx change_ctx;
4612   PetscErrorCode   ierr;
4613 
4614   PetscFunctionBegin;
4615   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4616   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4617   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4618   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4619   PetscFunctionReturn(0);
4620 }
4621 
4622 #undef __FUNCT__
4623 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4624 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4625 {
4626   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4627   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4628   PetscInt            *used_xadj,*used_adjncy;
4629   PetscBool           free_used_adj;
4630   PetscErrorCode      ierr;
4631 
4632   PetscFunctionBegin;
4633   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4634   free_used_adj = PETSC_FALSE;
4635   if (pcbddc->sub_schurs_layers == -1) {
4636     used_xadj = NULL;
4637     used_adjncy = NULL;
4638   } else {
4639     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4640       used_xadj = pcbddc->mat_graph->xadj;
4641       used_adjncy = pcbddc->mat_graph->adjncy;
4642     } else if (pcbddc->computed_rowadj) {
4643       used_xadj = pcbddc->mat_graph->xadj;
4644       used_adjncy = pcbddc->mat_graph->adjncy;
4645     } else {
4646       PetscBool      flg_row=PETSC_FALSE;
4647       const PetscInt *xadj,*adjncy;
4648       PetscInt       nvtxs;
4649 
4650       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4651       if (flg_row) {
4652         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4653         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4654         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4655         free_used_adj = PETSC_TRUE;
4656       } else {
4657         pcbddc->sub_schurs_layers = -1;
4658         used_xadj = NULL;
4659         used_adjncy = NULL;
4660       }
4661       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4662     }
4663   }
4664   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4665 
4666   /* free adjacency */
4667   if (free_used_adj) {
4668     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4669   }
4670   PetscFunctionReturn(0);
4671 }
4672 
4673 #undef __FUNCT__
4674 #define __FUNCT__ "PCBDDCInitSubSchurs"
4675 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4676 {
4677   PC_IS               *pcis=(PC_IS*)pc->data;
4678   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4679   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4680   PCBDDCGraph         graph;
4681   Mat                 S_j;
4682   PetscErrorCode      ierr;
4683 
4684   PetscFunctionBegin;
4685   /* attach interface graph for determining subsets */
4686   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4687     IS verticesIS;
4688 
4689     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4690     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4691     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4692     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4693     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4694     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4695 /*
4696     if (pcbddc->dbg_flag) {
4697       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4698     }
4699 */
4700   } else {
4701     graph = pcbddc->mat_graph;
4702   }
4703 
4704   /* Create Schur complement matrix */
4705   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4706   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4707 
4708   /* sub_schurs init */
4709   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4710   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4711   /* free graph struct */
4712   if (pcbddc->sub_schurs_rebuild) {
4713     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4714   }
4715   PetscFunctionReturn(0);
4716 }
4717