xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 8bec7fa6a6a2cd30a71a71925a8685f07805244d)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
60   nmax = PetscMax(nmin,nmax);
61 
62   /* allocate lapack workspace */
63   cum = cum2 = 0;
64   maxneigs = 0;
65   for (i=0;i<sub_schurs->n_subs;i++) {
66     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
67       PetscInt n,subset_size;
68       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
69       n = PetscMin(subset_size,nmax);
70       cum += subset_size*n;
71       cum2 += n;
72       maxneigs = PetscMax(maxneigs,n);
73     }
74   }
75 
76   if (mss) {
77     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
78       PetscBLASInt B_itype = 1;
79       PetscBLASInt B_N = mss;
80       PetscReal    zero = 0.0;
81       PetscReal    eps = 0.0; /* dlamch? */
82 
83       B_lwork = -1;
84       S = NULL;
85       St = NULL;
86       eigs = NULL;
87       eigv = NULL;
88       B_iwork = NULL;
89       B_ifail = NULL;
90       thresh = 1.0;
91       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
92 #if defined(PETSC_USE_COMPLEX)
93       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
94 #else
95       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
96 #endif
97       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
98       ierr = PetscFPTrapPop();CHKERRQ(ierr);
99     } else {
100         /* TODO */
101     }
102   } else {
103     lwork = 0;
104   }
105 
106   nv = 0;
107   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
108     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
109   }
110   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
111   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
112                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
113 #if defined(PETSC_USE_COMPLEX)
114   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
115 #endif
116   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
117   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
118                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
119                       nv+cum,&pcbddc->adaptive_constraints_idxs,
120                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
121   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
122 
123   maxneigs = 0;
124   cum = cum2 = cumarray = 0;
125   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
126     const PetscInt *idxs;
127 
128     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
129     for (cum=0;cum<nv;cum++) {
130       pcbddc->adaptive_constraints_n[cum] = 1;
131       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
132       pcbddc->adaptive_constraints_ptrs[cum] = cum;
133       pcbddc->adaptive_constraints_data[cum] = 1.0;
134     }
135     cum2 = cum;
136     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
137   }
138 
139   if (mss) { /* multilevel */
140     if (pcbddc->use_deluxe_scaling) {
141       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
142     } else {
143       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
144     }
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
146   }
147 
148   for (i=0;i<sub_schurs->n_subs;i++) {
149     PetscInt j,subset_size;
150 
151     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
152     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
153       const PetscInt *idxs;
154       PetscScalar    one = 1.0,scalar_zero = 0.0;
155       PetscReal      zero=0.0;
156       PetscBLASInt   B_N;
157 
158       /* S should be copied since we need it for deluxe scaling */
159       if (sub_schurs->is_hermitian) {
160         PetscInt j;
161         for (j=0;j<subset_size;j++) {
162           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
163         }
164         for (j=0;j<subset_size;j++) {
165           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
166         }
167       } else {
168         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
169         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
170       }
171       /* is always this the right matrix? */
172       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
173 
174       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
175       /* St = Starray+cumarray; */
176       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
177       if (pcbddc->adaptive_threshold > 1.0) { /* this is an heuristic for edges */
178         thresh = (1.*pcbddc->mat_graph->count[idxs[0]])/(pcbddc->adaptive_threshold);
179       } else {
180         thresh = 1.0;
181       }
182 
183       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
184         PetscBLASInt B_itype = 1;
185         PetscBLASInt B_IL = 1, B_IU;
186         PetscReal    eps = -1.0; /* dlamch? */
187         PetscInt     nmin_s;
188 
189         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
190         /* ask for eigenvalues lower than thresh */
191         if (pcbddc->dbg_flag) {
192           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
193         }
194         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
195 #if defined(PETSC_USE_COMPLEX)
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
197 #else
198         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
199 #endif
200         ierr = PetscFPTrapPop();CHKERRQ(ierr);
201         if (B_ierr) {
202           if (B_ierr < 0 ) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
204           } else if (B_ierr <= B_N) {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
206           } else {
207             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
208           }
209         }
210 
211         if (B_neigs > nmax) {
212           if (pcbddc->dbg_flag) {
213             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
214           }
215           B_neigs = nmax;
216         }
217 
218         nmin_s = PetscMin(nmin,B_N);
219         if (B_neigs < nmin_s) {
220           PetscBLASInt B_neigs2;
221 
222           B_IL = B_neigs + 1;
223           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
224           if (pcbddc->dbg_flag) {
225             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
226           }
227           if (sub_schurs->is_hermitian) {
228             PetscInt j;
229             for (j=0;j<subset_size;j++) {
230               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
231             }
232             for (j=0;j<subset_size;j++) {
233               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
234             }
235           } else {
236             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
238           }
239           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
240 #if defined(PETSC_USE_COMPLEX)
241           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
242 #else
243           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
244 #endif
245           ierr = PetscFPTrapPop();CHKERRQ(ierr);
246           B_neigs += B_neigs2;
247         }
248         if (B_ierr) {
249           if (B_ierr < 0 ) {
250             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
251           } else if (B_ierr <= B_N) {
252             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
253           } else {
254             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
255           }
256         }
257         if (pcbddc->dbg_flag) {
258           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
259           for (j=0;j<B_neigs;j++) {
260             if (eigs[j] == 0.0) {
261               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
262             } else {
263               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",1.0/eigs[j]);
264             }
265           }
266         }
267       } else {
268           /* TODO */
269       }
270 
271       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
272       if (B_neigs) {
273         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&scalar_zero,Seigv,&B_N));
274       }
275       maxneigs = PetscMax(B_neigs,maxneigs);
276       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
277       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
278 
279       if (pcbddc->dbg_flag > 1) {
280         PetscInt ii;
281         for (ii=0;ii<B_neigs;ii++) {
282           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
283           for (j=0;j<B_N;j++) {
284             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e %1.4e\n",eigv[ii*B_N+j],Seigv[ii*B_N+j]);
285           }
286         }
287       }
288       for (j=0;j<B_neigs;j++) {
289 #if 0
290         {
291           PetscBLASInt Blas_N,Blas_one = 1.0;
292           PetscScalar norm;
293           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
294           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
295           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
296             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
297           } else {
298             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
299           }
300           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
301         }
302 #endif
303         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
304         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
305         cum2 += subset_size;
306       }
307       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
308     }
309     /* shift for next computation */
310     cumarray += subset_size*subset_size;
311   }
312   if (pcbddc->dbg_flag) {
313     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
314   }
315   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
316   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
317 
318   if (mss) {
319     if (pcbddc->use_deluxe_scaling) {
320       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
321     } else {
322       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
323     }
324     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
325   }
326   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
327 #if defined(PETSC_USE_COMPLEX)
328   ierr = PetscFree(rwork);CHKERRQ(ierr);
329 #endif
330   if (pcbddc->dbg_flag) {
331     PetscInt maxneigs_r;
332     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
333     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
334   }
335   PetscFunctionReturn(0);
336 }
337 
338 #undef __FUNCT__
339 #define __FUNCT__ "PCBDDCSetUpSolvers"
340 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
341 {
342   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
343   PetscScalar    *coarse_submat_vals;
344   PetscErrorCode ierr;
345 
346   PetscFunctionBegin;
347   /* Setup local scatters R_to_B and (optionally) R_to_D */
348   /* PCBDDCSetUpLocalWorkVectors should be called first! */
349   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
350 
351   /* Setup local neumann solver ksp_R */
352   /* PCBDDCSetUpLocalScatters should be called first! */
353   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
354 
355   /* Change global null space passed in by the user if change of basis has been requested */
356   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
357     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
358   }
359 
360   /*
361      Setup local correction and local part of coarse basis.
362      Gives back the dense local part of the coarse matrix in column major ordering
363   */
364   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
365 
366   /* Compute total number of coarse nodes and setup coarse solver */
367   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
368 
369   /* free */
370   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 
374 #undef __FUNCT__
375 #define __FUNCT__ "PCBDDCResetCustomization"
376 PetscErrorCode PCBDDCResetCustomization(PC pc)
377 {
378   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
379   PetscErrorCode ierr;
380 
381   PetscFunctionBegin;
382   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
385   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
387   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
388   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
389   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
390   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
391   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
392   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #undef __FUNCT__
397 #define __FUNCT__ "PCBDDCResetTopography"
398 PetscErrorCode PCBDDCResetTopography(PC pc)
399 {
400   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
401   PetscErrorCode ierr;
402 
403   PetscFunctionBegin;
404   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
405   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
406   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
407   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
408   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "PCBDDCResetSolvers"
414 PetscErrorCode PCBDDCResetSolvers(PC pc)
415 {
416   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
417   PetscScalar    *array;
418   PetscErrorCode ierr;
419 
420   PetscFunctionBegin;
421   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
422   if (pcbddc->coarse_phi_B) {
423     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
424     ierr = PetscFree(array);CHKERRQ(ierr);
425   }
426   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
427   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
428   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
429   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
430   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
431   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
432   if (pcbddc->local_auxmat2) {
433     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
434     ierr = PetscFree(array);CHKERRQ(ierr);
435   }
436   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
437   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
438   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
439   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
440   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
441   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
442   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
443   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
444   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
445   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
446   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
447   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
448   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
449   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
450   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
451   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
457 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
458 {
459   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
460   PC_IS          *pcis = (PC_IS*)pc->data;
461   VecType        impVecType;
462   PetscInt       n_constraints,n_R,old_size;
463   PetscErrorCode ierr;
464 
465   PetscFunctionBegin;
466   if (!pcbddc->ConstraintMatrix) {
467     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
468   }
469   /* get sizes */
470   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
471   n_R = pcis->n-pcbddc->n_actual_vertices;
472   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
473   /* local work vectors (try to avoid unneeded work)*/
474   /* R nodes */
475   old_size = -1;
476   if (pcbddc->vec1_R) {
477     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
478   }
479   if (n_R != old_size) {
480     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
481     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
482     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
483     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
484     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
485     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
486   }
487   /* local primal dofs */
488   old_size = -1;
489   if (pcbddc->vec1_P) {
490     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
491   }
492   if (pcbddc->local_primal_size != old_size) {
493     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
494     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
495     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
496     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
497   }
498   /* local explicit constraints */
499   old_size = -1;
500   if (pcbddc->vec1_C) {
501     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
502   }
503   if (n_constraints && n_constraints != old_size) {
504     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
505     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
506     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
507     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "PCBDDCSetUpCorrection"
514 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
515 {
516   PetscErrorCode         ierr;
517   /* pointers to pcis and pcbddc */
518   PC_IS*                 pcis = (PC_IS*)pc->data;
519   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
520   /* submatrices of local problem */
521   Mat                    A_RV,A_VR,A_VV;
522   /* submatrices of local coarse problem */
523   Mat                    S_VV,S_CV,S_VC,S_CC;
524   /* working matrices */
525   Mat                    C_CR;
526   /* additional working stuff */
527   PC                     pc_R;
528   Mat                    F;
529   PetscBool              isLU,isCHOL,isILU;
530 
531   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
532   PetscScalar            *work;
533   PetscInt               *idx_V_B;
534   PetscInt               n,n_vertices,n_constraints;
535   PetscInt               i,n_R,n_D,n_B;
536   PetscBool              unsymmetric_check;
537   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
538   MatType                impMatType;
539   /* some shortcuts to scalars */
540   PetscScalar            one=1.0,m_one=-1.0;
541 
542   PetscFunctionBegin;
543   /* get number of vertices (corners plus constraints with change of basis)
544      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
545   n_vertices = pcbddc->n_actual_vertices;
546   n_constraints = pcbddc->local_primal_size-n_vertices;
547   /* Set Non-overlapping dimensions */
548   n_B = pcis->n_B; n_D = pcis->n - n_B;
549   n_R = pcis->n-n_vertices;
550 
551   /* Set types for local objects needed by BDDC precondtioner */
552   impMatType = MATSEQDENSE;
553 
554   /* vertices in boundary numbering */
555   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
556   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
557   if (i != n_vertices) {
558     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
559   }
560 
561   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
562   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
567   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
568   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
569   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
570   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
571 
572   unsymmetric_check = PETSC_FALSE;
573   /* allocate workspace */
574   n = 0;
575   if (n_constraints) {
576     n += n_R*n_constraints;
577   }
578   if (n_vertices) {
579     n = PetscMax(2*n_R*n_vertices,n);
580   }
581   if (!pcbddc->issym) {
582     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
583     unsymmetric_check = PETSC_TRUE;
584   }
585   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
586 
587   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
588   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
589   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
590   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
591   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
592   if (isLU || isILU || isCHOL) {
593     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
594   } else {
595     F = NULL;
596   }
597 
598   /* Precompute stuffs needed for preprocessing and application of BDDC*/
599   if (n_constraints) {
600     Mat M1,M2,M3;
601     IS  is_aux;
602     /* see if we can save some allocations */
603     if (pcbddc->local_auxmat2) {
604       PetscInt on_R,on_constraints;
605       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
606       if (on_R != n_R || on_constraints != n_constraints) {
607         PetscScalar *marray;
608 
609         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
610         ierr = PetscFree(marray);CHKERRQ(ierr);
611         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
612         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
613       }
614     }
615     /* auxiliary matrices */
616     if (!pcbddc->local_auxmat2) {
617       PetscScalar *marray;
618 
619       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
620       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
621       marray += n_R*n_constraints;
622       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
623     }
624 
625     /* Extract constraints on R nodes: C_{CR}  */
626     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
627     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
628     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
629 
630     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
631     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
632     for (i=0;i<n_constraints;i++) {
633       const PetscScalar *row_cmat_values;
634       const PetscInt    *row_cmat_indices;
635       PetscInt          size_of_constraint,j;
636 
637       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638       for (j=0;j<size_of_constraint;j++) {
639         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
640       }
641       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
642     }
643     if (F) {
644       Mat B;
645 
646       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
647       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
648       ierr = MatDestroy(&B);CHKERRQ(ierr);
649     } else {
650       PetscScalar *xarray;
651       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
652       for (i=0;i<n_constraints;i++) {
653         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
654         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
655         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
656         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
657         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
658       }
659       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
660     }
661 
662     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
663     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
664     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
665     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
666     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
667     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
668     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
669     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
670     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
671     ierr = MatDestroy(&M2);CHKERRQ(ierr);
672     ierr = MatDestroy(&M3);CHKERRQ(ierr);
673     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
674     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
675     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
676     ierr = MatDestroy(&M1);CHKERRQ(ierr);
677   }
678   /* Get submatrices from subdomain matrix */
679   if (n_vertices) {
680     Mat       newmat;
681     IS        is_aux;
682     PetscInt  ibs,mbs;
683     PetscBool issbaij;
684 
685     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
686     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
687     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
688     if (ibs != mbs) { /* need to convert to SEQAIJ */
689       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
690       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
691       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
692       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
694     } else {
695       /* this is safe */
696       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
697       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
698       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
699         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
700         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
701         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
703         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
704       } else {
705         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
706         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
707       }
708     }
709     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
710   }
711 
712   /* Matrix of coarse basis functions (local) */
713   if (pcbddc->coarse_phi_B) {
714     PetscInt on_B,on_primal,on_D=n_D;
715     if (pcbddc->coarse_phi_D) {
716       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
717     }
718     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
719     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
720       PetscScalar *marray;
721 
722       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
723       ierr = PetscFree(marray);CHKERRQ(ierr);
724       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
725       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
726       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
727       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
728     }
729   }
730 
731   if (!pcbddc->coarse_phi_B) {
732     PetscScalar *marray;
733 
734     n = n_B*pcbddc->local_primal_size;
735     if (pcbddc->switch_static || pcbddc->dbg_flag) {
736       n += n_D*pcbddc->local_primal_size;
737     }
738     if (!pcbddc->issym) {
739       n *= 2;
740     }
741     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
742     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
743     n = n_B*pcbddc->local_primal_size;
744     if (pcbddc->switch_static || pcbddc->dbg_flag) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
746       n += n_D*pcbddc->local_primal_size;
747     }
748     if (!pcbddc->issym) {
749       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
750       if (pcbddc->switch_static || pcbddc->dbg_flag) {
751         n = n_B*pcbddc->local_primal_size;
752         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
753       }
754     } else {
755       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
756       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
757       if (pcbddc->switch_static || pcbddc->dbg_flag) {
758         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
759         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
760       }
761     }
762   }
763   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
764   /* vertices */
765   if (n_vertices) {
766 
767     if (n_R) {
768       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
769       PetscBLASInt B_N,B_one = 1;
770       PetscScalar  *x,*y;
771 
772       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
773       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
774       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
775       if (F) {
776         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
777       } else {
778         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
779         for (i=0;i<n_vertices;i++) {
780           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
781           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
782           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
783           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
784           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
785         }
786         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
787       }
788       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
789       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
790       if (n_constraints) {
791         Mat B;
792         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
793         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
794         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
795         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
796         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
797         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
798         ierr = MatDestroy(&B);CHKERRQ(ierr);
799       }
800       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
801       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
802       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
803       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
804       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
805       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
806       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
807       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
808       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
809       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
810       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
811       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
812     } else {
813       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
814       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
815     }
816     /* coarse basis functions */
817     for (i=0;i<n_vertices;i++) {
818       PetscScalar *y;
819 
820       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
821       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
822       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
823       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825       y[n_B*i+idx_V_B[i]] = 1.0;
826       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
827       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
828 
829       if (pcbddc->switch_static || pcbddc->dbg_flag) {
830         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
831         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
832         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
835         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
836       }
837       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
838     }
839     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
840     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
841   }
842 
843   if (n_constraints) {
844     Mat B;
845 
846     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
847     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
848     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
849     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
850     if (n_vertices) {
851       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
852     }
853     ierr = MatDestroy(&B);CHKERRQ(ierr);
854     /* coarse basis functions */
855     for (i=0;i<n_constraints;i++) {
856       PetscScalar *y;
857 
858       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
859       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
861       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
864       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
865 
866       if (pcbddc->switch_static || pcbddc->dbg_flag) {
867         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
868         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
869         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
872         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
873       }
874       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
875     }
876   }
877 
878   /* compute other basis functions for non-symmetric problems */
879   if (!pcbddc->issym) {
880     Mat B,X;
881 
882     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
883 
884     if (n_constraints) {
885       Mat S_CCT,B_C;
886 
887       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
888       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
889       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
890       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
891       if (n_vertices) {
892         Mat B_V,S_VCT;
893 
894         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
895         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
896         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
897         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
898         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
899       }
900       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
901     }
902     if (n_vertices && n_R) {
903       Mat          A_VRT;
904       PetscBLASInt B_N,B_one = 1;
905 
906       if (!n_constraints) { /* if there are no constraints, reset work */
907         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
908       }
909       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
910       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
911       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
912       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
913       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
914     }
915 
916     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
917     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
918       for (i=0;i<pcbddc->local_primal_size;i++) {
919         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
920         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
921         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
922         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
923         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
924       }
925     } else {
926       for (i=0;i<pcbddc->local_primal_size;i++) {
927         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
928         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
929         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
930         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
931         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
932       }
933     }
934     ierr = MatDestroy(&B);CHKERRQ(ierr);
935     /* coarse basis functions */
936     for (i=0;i<pcbddc->local_primal_size;i++) {
937       PetscScalar *y;
938 
939       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
940       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
941       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
942       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944       if (i<n_vertices) {
945         y[n_B*i+idx_V_B[i]] = 1.0;
946       }
947       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
948       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
949 
950       if (pcbddc->switch_static || pcbddc->dbg_flag) {
951         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
952         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
953         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
956         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
957       }
958       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
959     }
960     ierr = MatDestroy(&X);CHKERRQ(ierr);
961   }
962 
963   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
964   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
965   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
966   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
967   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
968   /* Checking coarse_sub_mat and coarse basis functios */
969   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
970   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
971   if (pcbddc->dbg_flag) {
972     Mat         coarse_sub_mat;
973     Mat         AUXMAT,TM1,TM2,TM3,TM4;
974     Mat         coarse_phi_D,coarse_phi_B;
975     Mat         coarse_psi_D,coarse_psi_B;
976     Mat         A_II,A_BB,A_IB,A_BI;
977     Mat         C_B,CPHI;
978     IS          is_dummy;
979     Vec         mones;
980     MatType     checkmattype=MATSEQAIJ;
981     PetscReal   real_value;
982 
983     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
984     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
985     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
986     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
987     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
988     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
989     if (unsymmetric_check) {
990       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
991       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
992     }
993     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
994 
995     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
996     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
997     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
998     if (unsymmetric_check) {
999       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1000       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1001       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1003       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1004       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1006       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1007       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1008       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1009       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1010       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1011     } else {
1012       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1013       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1014       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1015       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1016       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1017       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1018       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1019       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1020     }
1021     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1023     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1024     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1025     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1026     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1027     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1028     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1029 
1030     /* check constraints */
1031     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1032     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1033     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1034     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1035     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1036     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1037     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1038     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1039     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1040     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1041     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1042     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1043 
1044     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1045     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1046     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1047     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1049     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1050     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1051     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1052     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1053     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1054     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1055     if (unsymmetric_check) {
1056       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1057       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1058     }
1059     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1060   }
1061 
1062   /* free memory */
1063   ierr = PetscFree(work);CHKERRQ(ierr);
1064   if (n_vertices) {
1065     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1066   }
1067   if (n_constraints) {
1068     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1069   }
1070   /* get back data */
1071   *coarse_submat_vals_n = coarse_submat_vals;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1077 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1078 {
1079   Mat            *work_mat;
1080   IS             isrow_s,iscol_s;
1081   PetscBool      rsorted,csorted;
1082   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1083   PetscErrorCode ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1087   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1088   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1089   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1090 
1091   if (!rsorted) {
1092     const PetscInt *idxs;
1093     PetscInt *idxs_sorted,i;
1094 
1095     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1096     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1097     for (i=0;i<rsize;i++) {
1098       idxs_perm_r[i] = i;
1099     }
1100     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1101     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1102     for (i=0;i<rsize;i++) {
1103       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1104     }
1105     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1107   } else {
1108     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1109     isrow_s = isrow;
1110   }
1111 
1112   if (!csorted) {
1113     if (isrow == iscol) {
1114       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1115       iscol_s = isrow_s;
1116     } else {
1117       const PetscInt *idxs;
1118       PetscInt *idxs_sorted,i;
1119 
1120       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1121       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1122       for (i=0;i<csize;i++) {
1123         idxs_perm_c[i] = i;
1124       }
1125       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1126       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1127       for (i=0;i<csize;i++) {
1128         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1129       }
1130       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1131       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1132     }
1133   } else {
1134     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1135     iscol_s = iscol;
1136   }
1137 
1138   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1139 
1140   if (!rsorted || !csorted) {
1141     Mat      new_mat;
1142     IS       is_perm_r,is_perm_c;
1143 
1144     if (!rsorted) {
1145       PetscInt *idxs_r,i;
1146       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1147       for (i=0;i<rsize;i++) {
1148         idxs_r[idxs_perm_r[i]] = i;
1149       }
1150       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1151       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1152     } else {
1153       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1154     }
1155     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1156 
1157     if (!csorted) {
1158       if (isrow_s == iscol_s) {
1159         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1160         is_perm_c = is_perm_r;
1161       } else {
1162         PetscInt *idxs_c,i;
1163         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1164         for (i=0;i<csize;i++) {
1165           idxs_c[idxs_perm_c[i]] = i;
1166         }
1167         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1168         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1169       }
1170     } else {
1171       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1172     }
1173     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1174 
1175     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1176     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1177     work_mat[0] = new_mat;
1178     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1179     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1180   }
1181 
1182   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1183   *B = work_mat[0];
1184   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1185   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1186   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1192 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1193 {
1194   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1195   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1196   Mat            new_mat;
1197   IS             is_local,is_global;
1198   PetscInt       local_size;
1199   PetscBool      isseqaij;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1204   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1205   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1206   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1207   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1208   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1209   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1210 
1211   /* check */
1212   if (pcbddc->dbg_flag) {
1213     Vec       x,x_change;
1214     PetscReal error;
1215 
1216     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1217     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1218     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1219     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1221     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1222     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1223     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1225     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1226     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1227     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1228     ierr = VecDestroy(&x);CHKERRQ(ierr);
1229     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1230   }
1231 
1232   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1233   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1234   if (isseqaij) {
1235     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236   } else {
1237     Mat work_mat;
1238     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1239     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1240     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1241   }
1242   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1243 #if !defined(PETSC_USE_COMPLEX)
1244   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1245 #endif
1246   /*
1247   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1248   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1249   */
1250   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1256 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1257 {
1258   PC_IS*         pcis = (PC_IS*)(pc->data);
1259   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1260   IS             is_aux1,is_aux2;
1261   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1262   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1263   PetscInt       vbs,bs;
1264   PetscBT        bitmask;
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   /*
1269     No need to setup local scatters if
1270       - primal space is unchanged
1271         AND
1272       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1273         AND
1274       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1275   */
1276   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1277     PetscFunctionReturn(0);
1278   }
1279   /* destroy old objects */
1280   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1281   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1282   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1283   /* Set Non-overlapping dimensions */
1284   n_B = pcis->n_B; n_D = pcis->n - n_B;
1285   n_vertices = pcbddc->n_actual_vertices;
1286   /* create auxiliary bitmask */
1287   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1288   for (i=0;i<n_vertices;i++) {
1289     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1290   }
1291 
1292   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1293   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1294   for (i=0, n_R=0; i<pcis->n; i++) {
1295     if (!PetscBTLookup(bitmask,i)) {
1296       idx_R_local[n_R] = i;
1297       n_R++;
1298     }
1299   }
1300 
1301   /* Block code */
1302   vbs = 1;
1303   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1304   if (bs>1 && !(n_vertices%bs)) {
1305     PetscBool is_blocked = PETSC_TRUE;
1306     PetscInt  *vary;
1307     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1308     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1309     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1310     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1311     for (i=0; i<n_vertices/bs; i++) {
1312       if (vary[i]!=0 && vary[i]!=bs) {
1313         is_blocked = PETSC_FALSE;
1314         break;
1315       }
1316     }
1317     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1318       vbs = bs;
1319       for (i=0;i<n_R/vbs;i++) {
1320         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1321       }
1322     }
1323     ierr = PetscFree(vary);CHKERRQ(ierr);
1324   }
1325   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1326   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1327 
1328   /* print some info if requested */
1329   if (pcbddc->dbg_flag) {
1330     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1331     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1332     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1333     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1334     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1335     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1336     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1337     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1338   }
1339 
1340   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1341   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1342   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1343   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1344   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0; i<n_D; i++) {
1346     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1347   }
1348   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1349   for (i=0, j=0; i<n_R; i++) {
1350     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1351       aux_array1[j++] = i;
1352     }
1353   }
1354   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1355   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1356   for (i=0, j=0; i<n_B; i++) {
1357     if (!PetscBTLookup(bitmask,is_indices[i])) {
1358       aux_array2[j++] = i;
1359     }
1360   }
1361   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1362   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1363   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1364   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1365   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1366 
1367   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1368     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1369     for (i=0, j=0; i<n_R; i++) {
1370       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1371         aux_array1[j++] = i;
1372       }
1373     }
1374     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1375     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1376     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1377   }
1378   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1379   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1386 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1387 {
1388   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1389   PC_IS          *pcis = (PC_IS*)pc->data;
1390   PC             pc_temp;
1391   Mat            A_RR;
1392   MatReuse       reuse;
1393   PetscScalar    m_one = -1.0;
1394   PetscReal      value;
1395   PetscInt       n_D,n_R,ibs,mbs;
1396   PetscBool      use_exact,use_exact_reduced,issbaij;
1397   PetscErrorCode ierr;
1398   /* prefixes stuff */
1399   char           dir_prefix[256],neu_prefix[256],str_level[16];
1400   size_t         len;
1401 
1402   PetscFunctionBegin;
1403 
1404   /* compute prefixes */
1405   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1406   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1407   if (!pcbddc->current_level) {
1408     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1409     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1410     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1411     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1412   } else {
1413     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1414     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1415     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1416     len -= 15; /* remove "pc_bddc_coarse_" */
1417     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1418     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1419     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1420     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1421     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1422     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1423     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1424     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1425   }
1426 
1427   /* DIRICHLET PROBLEM */
1428   if (dirichlet) {
1429     if (pcbddc->issym) {
1430       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1431     }
1432     /* Matrix for Dirichlet problem is pcis->A_II */
1433     n_D = pcis->n - pcis->n_B;
1434     if (!pcbddc->ksp_D) { /* create object if not yet build */
1435       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1436       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1437       /* default */
1438       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1439       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1440       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1441       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1442       if (issbaij) {
1443         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1444       } else {
1445         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1446       }
1447       /* Allow user's customization */
1448       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1449       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1450     }
1451     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1452     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1453     if (!n_D) {
1454       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1455       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1456     }
1457     /* Set Up KSP for Dirichlet problem of BDDC */
1458     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1459     /* set ksp_D into pcis data */
1460     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1461     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1462     pcis->ksp_D = pcbddc->ksp_D;
1463   }
1464 
1465   /* NEUMANN PROBLEM */
1466   A_RR = 0;
1467   if (neumann) {
1468     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1469     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1470     if (pcbddc->ksp_R) { /* already created ksp */
1471       PetscInt nn_R;
1472       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1473       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1474       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1475       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1476         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1477         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478         reuse = MAT_INITIAL_MATRIX;
1479       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1480         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1481           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482           reuse = MAT_INITIAL_MATRIX;
1483         } else { /* safe to reuse the matrix */
1484           reuse = MAT_REUSE_MATRIX;
1485         }
1486       }
1487       /* last check */
1488       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1489         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1490         reuse = MAT_INITIAL_MATRIX;
1491       }
1492     } else { /* first time, so we need to create the matrix */
1493       reuse = MAT_INITIAL_MATRIX;
1494     }
1495     /* extract A_RR */
1496     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1497     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1498     if (ibs != mbs) {
1499       Mat newmat;
1500       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1501       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1502       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1505     }
1506     if (pcbddc->issym) {
1507       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1508     }
1509     if (!pcbddc->ksp_R) { /* create object if not present */
1510       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1511       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1512       /* default */
1513       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1514       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1516       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1517       if (issbaij) {
1518         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1519       } else {
1520         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1521       }
1522       /* Allow user's customization */
1523       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1524       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1527     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1528     if (!n_R) {
1529       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1530       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1531     }
1532     /* Set Up KSP for Neumann problem of BDDC */
1533     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1534   }
1535 
1536   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1537   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1538     if (pcbddc->dbg_flag) {
1539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1542     }
1543     if (dirichlet) { /* Dirichlet */
1544       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1545       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1546       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1547       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1548       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1549       /* need to be adapted? */
1550       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1551       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1552       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1553       /* print info */
1554       if (pcbddc->dbg_flag) {
1555         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1556         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1557       }
1558       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1559         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1560       }
1561     }
1562     if (neumann) { /* Neumann */
1563       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1564       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1565       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1566       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1567       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1568       /* need to be adapted? */
1569       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1570       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1571       /* print info */
1572       if (pcbddc->dbg_flag) {
1573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1574         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1575       }
1576       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1577         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1578       }
1579     }
1580   }
1581   /* free Neumann problem's matrix */
1582   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1588 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1589 {
1590   PetscErrorCode ierr;
1591   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1592 
1593   PetscFunctionBegin;
1594   if (applytranspose) {
1595     if (pcbddc->local_auxmat1) {
1596       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1597       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1598     }
1599     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1600   } else {
1601     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1602     if (pcbddc->local_auxmat1) {
1603       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1604       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1605     }
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1611 #undef __FUNCT__
1612 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1613 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1614 {
1615   PetscErrorCode ierr;
1616   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1617   PC_IS*            pcis = (PC_IS*)  (pc->data);
1618   const PetscScalar zero = 0.0;
1619 
1620   PetscFunctionBegin;
1621   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1622   if (applytranspose) {
1623     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1624     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1625   } else {
1626     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1628   }
1629   /* start communications from local primal nodes to rhs of coarse solver */
1630   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1631   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1635   /* TODO remove null space when doing multilevel */
1636   if (pcbddc->coarse_ksp) {
1637     Vec rhs,sol;
1638 
1639     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1640     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1641     if (applytranspose) {
1642       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1643     } else {
1644       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1645     }
1646   }
1647 
1648   /* Local solution on R nodes */
1649   if (pcis->n) {
1650     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     }
1657     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1658     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (pcbddc->switch_static) {
1662       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     }
1665   }
1666 
1667   /* communications from coarse sol to local primal nodes */
1668   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1669   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1670 
1671   /* Sum contributions from two levels */
1672   if (applytranspose) {
1673     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1674     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1675   } else {
1676     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1677     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1684 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1685 {
1686   PetscErrorCode ierr;
1687   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1688   PetscScalar    *array;
1689   Vec            from,to;
1690 
1691   PetscFunctionBegin;
1692   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1693     from = pcbddc->coarse_vec;
1694     to = pcbddc->vec1_P;
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697 
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1700       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1702       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1703       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1704     }
1705   } else { /* from local to global -> put data in coarse right hand side */
1706     from = pcbddc->vec1_P;
1707     to = pcbddc->coarse_vec;
1708   }
1709   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1715 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1716 {
1717   PetscErrorCode ierr;
1718   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1719   PetscScalar    *array;
1720   Vec            from,to;
1721 
1722   PetscFunctionBegin;
1723   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1724     from = pcbddc->coarse_vec;
1725     to = pcbddc->vec1_P;
1726   } else { /* from local to global -> put data in coarse right hand side */
1727     from = pcbddc->vec1_P;
1728     to = pcbddc->coarse_vec;
1729   }
1730   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1731   if (smode == SCATTER_FORWARD) {
1732     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1733       Vec tvec;
1734 
1735       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1736       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1737       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1738       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1739     }
1740   } else {
1741     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1742      ierr = VecResetArray(from);CHKERRQ(ierr);
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* uncomment for testing purposes */
1749 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1750 #undef __FUNCT__
1751 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1752 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1753 {
1754   PetscErrorCode    ierr;
1755   PC_IS*            pcis = (PC_IS*)(pc->data);
1756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1758   /* one and zero */
1759   PetscScalar       one=1.0,zero=0.0;
1760   /* space to store constraints and their local indices */
1761   PetscScalar       *temp_quadrature_constraint;
1762   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1763   /* iterators */
1764   PetscInt          i,j,k,total_counts,temp_start_ptr;
1765   /* BLAS integers */
1766   PetscBLASInt      lwork,lierr;
1767   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1768   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1769   /* reuse */
1770   PetscInt          olocal_primal_size;
1771   PetscInt          *oprimal_indices_local_idxs;
1772   /* change of basis */
1773   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1774   PetscBool         boolforchange,qr_needed;
1775   PetscBT           touched,change_basis,qr_needed_idx;
1776   /* auxiliary stuff */
1777   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1778   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1779   PetscScalar       *temp_quadrature_constraint_work=NULL;
1780   /* some quantities */
1781   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1782   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1783 
1784   PetscFunctionBegin;
1785   /* Destroy Mat objects computed previously */
1786   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1787   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1788 
1789   /* print some info */
1790   if (pcbddc->dbg_flag) {
1791     IS       vertices;
1792     PetscInt nv,nedges,nfaces;
1793     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1794     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1795     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1796     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1798     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1799     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1800     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1801     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1802   }
1803 
1804   if (!pcbddc->adaptive_selection) {
1805     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1806     MatNullSpace nearnullsp;
1807     const Vec    *nearnullvecs;
1808     Vec          *localnearnullsp;
1809     PetscScalar  *array;
1810     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1811     PetscBool    nnsp_has_cnst;
1812     /* LAPACK working arrays for SVD or POD */
1813     PetscBool    skip_lapack;
1814     PetscScalar  *work;
1815     PetscReal    *singular_vals;
1816 #if defined(PETSC_USE_COMPLEX)
1817     PetscReal    *rwork;
1818 #endif
1819 #if defined(PETSC_MISSING_LAPACK_GESVD)
1820     PetscScalar  *temp_basis,*correlation_mat;
1821 #else
1822     PetscBLASInt dummy_int=1;
1823     PetscScalar  dummy_scalar=1.;
1824 #endif
1825 
1826     /* Get index sets for faces, edges and vertices from graph */
1827     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1828     /* free unneeded index sets */
1829     if (!pcbddc->use_vertices) {
1830       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1831     }
1832     if (!pcbddc->use_edges) {
1833       for (i=0;i<n_ISForEdges;i++) {
1834         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1835       }
1836       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1837       n_ISForEdges = 0;
1838     }
1839     if (!pcbddc->use_faces) {
1840       for (i=0;i<n_ISForFaces;i++) {
1841         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1842       }
1843       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1844       n_ISForFaces = 0;
1845     }
1846     /* HACKS (the following two blocks of code) */
1847     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1848       pcbddc->use_change_of_basis = PETSC_TRUE;
1849       if (!ISForEdges) {
1850         pcbddc->use_change_on_faces = PETSC_TRUE;
1851       }
1852     }
1853     if (pcbddc->NullSpace) {
1854       /* use_change_of_basis should be consistent among processors */
1855       PetscBool tbool[2],gbool[2];
1856       tbool[0] = pcbddc->use_change_of_basis;
1857       tbool[1] = pcbddc->use_change_on_faces;
1858       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1859       pcbddc->use_change_of_basis = gbool[0];
1860       pcbddc->use_change_on_faces = gbool[1];
1861     }
1862 
1863     /* check if near null space is attached to global mat */
1864     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1865     if (nearnullsp) {
1866       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1867       /* remove any stored info */
1868       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1869       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1870       /* store information for BDDC solver reuse */
1871       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1872       pcbddc->onearnullspace = nearnullsp;
1873       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1874       for (i=0;i<nnsp_size;i++) {
1875         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1876       }
1877     } else { /* if near null space is not provided BDDC uses constants by default */
1878       nnsp_size = 0;
1879       nnsp_has_cnst = PETSC_TRUE;
1880     }
1881     /* get max number of constraints on a single cc */
1882     max_constraints = nnsp_size;
1883     if (nnsp_has_cnst) max_constraints++;
1884 
1885     /*
1886          Evaluate maximum storage size needed by the procedure
1887          - temp_indices will contain start index of each constraint stored as follows
1888          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1889          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1890          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1891                                                                                                                                                            */
1892     total_counts = n_ISForFaces+n_ISForEdges;
1893     total_counts *= max_constraints;
1894     n_vertices = 0;
1895     if (ISForVertices) {
1896       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1897     }
1898     total_counts += n_vertices;
1899     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1900     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1901     total_counts = 0;
1902     max_size_of_constraint = 0;
1903     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1904       if (i<n_ISForEdges) {
1905         used_IS = &ISForEdges[i];
1906       } else {
1907         used_IS = &ISForFaces[i-n_ISForEdges];
1908       }
1909       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1910       total_counts += j;
1911       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1912     }
1913     total_counts *= max_constraints;
1914     total_counts += n_vertices;
1915     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1916     /* get local part of global near null space vectors */
1917     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1918     for (k=0;k<nnsp_size;k++) {
1919       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1920       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1921       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1922     }
1923 
1924     /* whether or not to skip lapack calls */
1925     skip_lapack = PETSC_TRUE;
1926     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1927 
1928     /* allocate some auxiliary stuff */
1929     if (!skip_lapack || pcbddc->use_qr_single) {
1930       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1931     } else {
1932       gidxs = NULL;
1933       permutation = NULL;
1934       temp_indices_to_constraint_work = NULL;
1935       temp_quadrature_constraint_work = NULL;
1936     }
1937 
1938     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1939     if (!skip_lapack) {
1940       PetscScalar temp_work;
1941 
1942 #if defined(PETSC_MISSING_LAPACK_GESVD)
1943       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1944       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1945       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1946       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1947 #if defined(PETSC_USE_COMPLEX)
1948       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1949 #endif
1950       /* now we evaluate the optimal workspace using query with lwork=-1 */
1951       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1952       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1953       lwork = -1;
1954       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1955 #if !defined(PETSC_USE_COMPLEX)
1956       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1957 #else
1958       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1959 #endif
1960       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1961       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1962 #else /* on missing GESVD */
1963       /* SVD */
1964       PetscInt max_n,min_n;
1965       max_n = max_size_of_constraint;
1966       min_n = max_constraints;
1967       if (max_size_of_constraint < max_constraints) {
1968         min_n = max_size_of_constraint;
1969         max_n = max_constraints;
1970       }
1971       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1972 #if defined(PETSC_USE_COMPLEX)
1973       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1974 #endif
1975       /* now we evaluate the optimal workspace using query with lwork=-1 */
1976       lwork = -1;
1977       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1978       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1979       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1980       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1981 #if !defined(PETSC_USE_COMPLEX)
1982       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1983 #else
1984       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1985 #endif
1986       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1987       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1988 #endif /* on missing GESVD */
1989       /* Allocate optimal workspace */
1990       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1991       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1992     }
1993     /* Now we can loop on constraining sets */
1994     total_counts = 0;
1995     temp_indices[0] = 0;
1996     /* vertices */
1997     if (ISForVertices) {
1998       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1999       if (nnsp_has_cnst) { /* consider all vertices */
2000         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2001         for (i=0;i<n_vertices;i++) {
2002           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2003           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2004           total_counts++;
2005         }
2006       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2007         PetscBool used_vertex;
2008         for (i=0;i<n_vertices;i++) {
2009           used_vertex = PETSC_FALSE;
2010           k = 0;
2011           while (!used_vertex && k<nnsp_size) {
2012             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2013             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2014               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2015               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2016               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2017               total_counts++;
2018               used_vertex = PETSC_TRUE;
2019             }
2020             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2021             k++;
2022           }
2023         }
2024       }
2025       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2026       n_vertices = total_counts;
2027     }
2028 
2029     /* edges and faces */
2030     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2031       if (ncc<n_ISForEdges) {
2032         used_IS = &ISForEdges[ncc];
2033         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2034       } else {
2035         used_IS = &ISForFaces[ncc-n_ISForEdges];
2036         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2037       }
2038       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2039       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2040       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2041       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2042       /* change of basis should not be performed on local periodic nodes */
2043       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2044       if (nnsp_has_cnst) {
2045         PetscScalar quad_value;
2046         temp_constraints++;
2047         if (!pcbddc->use_nnsp_true) {
2048           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2049         } else {
2050           quad_value = 1.0;
2051         }
2052         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2053         for (j=0;j<size_of_constraint;j++) {
2054           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2055         }
2056         /* sort by global ordering if using lapack subroutines (not needed!) */
2057         if (!skip_lapack || pcbddc->use_qr_single) {
2058           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2059           for (j=0;j<size_of_constraint;j++) {
2060             permutation[j]=j;
2061           }
2062           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2063           for (j=0;j<size_of_constraint;j++) {
2064             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2065           }
2066           for (j=0;j<size_of_constraint;j++) {
2067             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2068             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2069           }
2070           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2071           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2072         }
2073         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2074         total_counts++;
2075       }
2076       for (k=0;k<nnsp_size;k++) {
2077         PetscReal real_value;
2078         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2079         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2080         for (j=0;j<size_of_constraint;j++) {
2081           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2082         }
2083         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2084         /* check if array is null on the connected component */
2085         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2086         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2087         if (real_value > 0.0) { /* keep indices and values */
2088           /* sort by global ordering if using lapack subroutines */
2089           if (!skip_lapack || pcbddc->use_qr_single) {
2090             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2091             for (j=0;j<size_of_constraint;j++) {
2092               permutation[j]=j;
2093             }
2094             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2095             for (j=0;j<size_of_constraint;j++) {
2096               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2097               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2098             }
2099             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2100             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2101           }
2102           temp_constraints++;
2103           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2104           total_counts++;
2105         }
2106       }
2107       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2108       valid_constraints = temp_constraints;
2109       if (!pcbddc->use_nnsp_true && temp_constraints) {
2110         if (temp_constraints == 1) { /* just normalize the constraint */
2111           PetscScalar norm;
2112           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2113           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2114           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2115           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2116         } else { /* perform SVD */
2117           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2118 
2119 #if defined(PETSC_MISSING_LAPACK_GESVD)
2120           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2121              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2122              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2123                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2124                 from that computed using LAPACKgesvd
2125              -> This is due to a different computation of eigenvectors in LAPACKheev
2126              -> The quality of the POD-computed basis will be the same */
2127           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2128           /* Store upper triangular part of correlation matrix */
2129           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2130           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2131           for (j=0;j<temp_constraints;j++) {
2132             for (k=0;k<j+1;k++) {
2133               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2134             }
2135           }
2136           /* compute eigenvalues and eigenvectors of correlation matrix */
2137           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2138           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2139 #if !defined(PETSC_USE_COMPLEX)
2140           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2141 #else
2142           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2143 #endif
2144           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2145           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2146           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2147           j = 0;
2148           while (j < temp_constraints && singular_vals[j] < tol) j++;
2149           total_counts = total_counts-j;
2150           valid_constraints = temp_constraints-j;
2151           /* scale and copy POD basis into used quadrature memory */
2152           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2153           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2154           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2155           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2156           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2157           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2158           if (j<temp_constraints) {
2159             PetscInt ii;
2160             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2161             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2162             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2163             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2164             for (k=0;k<temp_constraints-j;k++) {
2165               for (ii=0;ii<size_of_constraint;ii++) {
2166                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2167               }
2168             }
2169           }
2170 #else  /* on missing GESVD */
2171           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2172           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2173           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2174           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2175 #if !defined(PETSC_USE_COMPLEX)
2176           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2177 #else
2178           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2179 #endif
2180           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2181           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2182           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2183           k = temp_constraints;
2184           if (k > size_of_constraint) k = size_of_constraint;
2185           j = 0;
2186           while (j < k && singular_vals[k-j-1] < tol) j++;
2187           valid_constraints = k-j;
2188           total_counts = total_counts-temp_constraints+valid_constraints;
2189 #endif /* on missing GESVD */
2190         }
2191       }
2192       /* setting change_of_basis flag is safe now */
2193       if (boolforchange) {
2194         for (j=0;j<valid_constraints;j++) {
2195           PetscBTSet(change_basis,total_counts-j-1);
2196         }
2197       }
2198     }
2199     /* free workspace */
2200     if (!skip_lapack || pcbddc->use_qr_single) {
2201       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2202     }
2203     if (!skip_lapack) {
2204       ierr = PetscFree(work);CHKERRQ(ierr);
2205 #if defined(PETSC_USE_COMPLEX)
2206       ierr = PetscFree(rwork);CHKERRQ(ierr);
2207 #endif
2208       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2209 #if defined(PETSC_MISSING_LAPACK_GESVD)
2210       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2211       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2212 #endif
2213     }
2214     for (k=0;k<nnsp_size;k++) {
2215       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2216     }
2217     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2218     /* free index sets of faces, edges and vertices */
2219     for (i=0;i<n_ISForFaces;i++) {
2220       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2221     }
2222     if (n_ISForFaces) {
2223       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2224     }
2225     for (i=0;i<n_ISForEdges;i++) {
2226       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2227     }
2228     if (n_ISForEdges) {
2229       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2230     }
2231     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2232   } else {
2233     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2234     PetscInt        cum = 0;
2235 
2236     total_counts = 0;
2237     n_vertices = 0;
2238     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2239       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2240     }
2241     max_constraints = 0;
2242     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2243       total_counts += pcbddc->adaptive_constraints_n[i];
2244       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2245     }
2246     temp_indices = pcbddc->adaptive_constraints_ptrs;
2247     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2248     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2249 
2250 #if 0
2251     printf("Found %d totals\n",total_counts);
2252     for (i=0;i<total_counts;i++) {
2253       printf("const %d, start %d",i,temp_indices[i]);
2254       printf(" end %d:\n",temp_indices[i+1]);
2255       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2256         printf("  idxs %d",temp_indices_to_constraint[j]);
2257         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2258       }
2259     }
2260     for (i=0;i<n_vertices;i++) {
2261       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2262     }
2263     for (i=0;i<sub_schurs->n_subs;i++) {
2264       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2265     }
2266 #endif
2267 
2268     max_size_of_constraint = 0;
2269     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2270     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2271     /* Change of basis */
2272     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2273     if (pcbddc->use_change_of_basis) {
2274       cum = n_vertices;
2275       for (i=0;i<sub_schurs->n_subs;i++) {
2276         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2277           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2278             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2279           }
2280         }
2281         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2282       }
2283     }
2284   }
2285 
2286   /* map temp_indices_to_constraint in boundary numbering */
2287   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2288   if (i != temp_indices[total_counts]) {
2289     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2290   }
2291 
2292   /* set quantities in pcbddc data structure and store previous primal size */
2293   /* n_vertices defines the number of subdomain corners in the primal space */
2294   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2295   olocal_primal_size = pcbddc->local_primal_size;
2296   pcbddc->local_primal_size = total_counts;
2297   pcbddc->n_vertices = n_vertices;
2298   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2299 
2300   /* Create constraint matrix */
2301   /* The constraint matrix is used to compute the l2g map of primal dofs */
2302   /* so we need to set it up properly either with or without change of basis */
2303   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2304   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2305   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2306   /* array to compute a local numbering of constraints : vertices first then constraints */
2307   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2308   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2309   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2310   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2311   /* auxiliary stuff for basis change */
2312   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2313   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2314 
2315   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2316   total_primal_vertices=0;
2317   for (i=0;i<pcbddc->local_primal_size;i++) {
2318     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2319     if (size_of_constraint == 1) {
2320       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2321       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2322       aux_primal_minloc[total_primal_vertices]=0;
2323       total_primal_vertices++;
2324     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2325       PetscInt min_loc,min_index;
2326       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2327       /* find first untouched local node */
2328       k = 0;
2329       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2330       min_index = global_indices[k];
2331       min_loc = k;
2332       /* search the minimum among global nodes already untouched on the cc */
2333       for (k=1;k<size_of_constraint;k++) {
2334         /* there can be more than one constraint on a single connected component */
2335         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2336           min_index = global_indices[k];
2337           min_loc = k;
2338         }
2339       }
2340       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2341       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2342       aux_primal_minloc[total_primal_vertices]=min_loc;
2343       total_primal_vertices++;
2344     }
2345   }
2346   /* determine if a QR strategy is needed for change of basis */
2347   qr_needed = PETSC_FALSE;
2348   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2349   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2350     if (PetscBTLookup(change_basis,i)) {
2351       if (!pcbddc->use_qr_single) {
2352         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2353         j = 0;
2354         for (k=0;k<size_of_constraint;k++) {
2355           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2356             j++;
2357           }
2358         }
2359         /* found more than one primal dof on the cc */
2360         if (j > 1) {
2361           PetscBTSet(qr_needed_idx,i);
2362           qr_needed = PETSC_TRUE;
2363         }
2364       } else {
2365         PetscBTSet(qr_needed_idx,i);
2366         qr_needed = PETSC_TRUE;
2367       }
2368     }
2369   }
2370   /* free workspace */
2371   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2372 
2373   /* permute indices in order to have a sorted set of vertices */
2374   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2375 
2376   /* nonzero structure of constraint matrix */
2377   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2378   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2379   j=total_primal_vertices;
2380   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2381     if (!PetscBTLookup(change_basis,i)) {
2382       nnz[j]=temp_indices[i+1]-temp_indices[i];
2383       j++;
2384     }
2385   }
2386   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2387   ierr = PetscFree(nnz);CHKERRQ(ierr);
2388   /* set values in constraint matrix */
2389   for (i=0;i<total_primal_vertices;i++) {
2390     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2391   }
2392   total_counts = total_primal_vertices;
2393   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2394     if (!PetscBTLookup(change_basis,i)) {
2395       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2396       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2397       total_counts++;
2398     }
2399   }
2400   /* assembling */
2401   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2402   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2403   /*
2404   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2405   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2406   */
2407   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2408   if (pcbddc->use_change_of_basis) {
2409     /* dual and primal dofs on a single cc */
2410     PetscInt     dual_dofs,primal_dofs;
2411     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2412     PetscInt     primal_counter;
2413     /* working stuff for GEQRF */
2414     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2415     PetscBLASInt lqr_work;
2416     /* working stuff for UNGQR */
2417     PetscScalar  *gqr_work,lgqr_work_t;
2418     PetscBLASInt lgqr_work;
2419     /* working stuff for TRTRS */
2420     PetscScalar  *trs_rhs;
2421     PetscBLASInt Blas_NRHS;
2422     /* pointers for values insertion into change of basis matrix */
2423     PetscInt     *start_rows,*start_cols;
2424     PetscScalar  *start_vals;
2425     /* working stuff for values insertion */
2426     PetscBT      is_primal;
2427     /* matrix sizes */
2428     PetscInt     global_size,local_size;
2429     /* temporary change of basis */
2430     Mat          localChangeOfBasisMatrix;
2431     /* extra space for debugging */
2432     PetscScalar  *dbg_work;
2433 
2434     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2435     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2436     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2437     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2438     /* nonzeros for local mat */
2439     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2440     for (i=0;i<pcis->n;i++) nnz[i]=1;
2441     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2442       if (PetscBTLookup(change_basis,i)) {
2443         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2444         if (PetscBTLookup(qr_needed_idx,i)) {
2445           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2446         } else {
2447           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2448           /* get local primal index on the cc */
2449           j = 0;
2450           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2451           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2452         }
2453       }
2454     }
2455     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2456     ierr = PetscFree(nnz);CHKERRQ(ierr);
2457     /* Set initial identity in the matrix */
2458     for (i=0;i<pcis->n;i++) {
2459       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2460     }
2461 
2462     if (pcbddc->dbg_flag) {
2463       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2464       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2465     }
2466 
2467 
2468     /* Now we loop on the constraints which need a change of basis */
2469     /*
2470        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2471        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2472 
2473        Basic blocks of change of basis matrix T computed by
2474 
2475           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2476 
2477             | 1        0   ...        0         s_1/S |
2478             | 0        1   ...        0         s_2/S |
2479             |              ...                        |
2480             | 0        ...            1     s_{n-1}/S |
2481             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2482 
2483             with S = \sum_{i=1}^n s_i^2
2484             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2485                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2486 
2487           - QR decomposition of constraints otherwise
2488     */
2489     if (qr_needed) {
2490       /* space to store Q */
2491       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2492       /* first we issue queries for optimal work */
2493       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2494       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2495       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2496       lqr_work = -1;
2497       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2498       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2499       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2500       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2501       lgqr_work = -1;
2502       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2503       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2504       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2505       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2506       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2507       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2508       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2509       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2510       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2511       /* array to store scaling factors for reflectors */
2512       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2513       /* array to store rhs and solution of triangular solver */
2514       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2515       /* allocating workspace for check */
2516       if (pcbddc->dbg_flag) {
2517         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2518       }
2519     }
2520     /* array to store whether a node is primal or not */
2521     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2522     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2523     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2524     if (i != total_primal_vertices) {
2525       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2526     }
2527     for (i=0;i<total_primal_vertices;i++) {
2528       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2529     }
2530     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2531 
2532     /* loop on constraints and see whether or not they need a change of basis and compute it */
2533     /* -> using implicit ordering contained in temp_indices data */
2534     total_counts = pcbddc->n_vertices;
2535     primal_counter = total_counts;
2536     while (total_counts<pcbddc->local_primal_size) {
2537       primal_dofs = 1;
2538       if (PetscBTLookup(change_basis,total_counts)) {
2539         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2540         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2541           primal_dofs++;
2542         }
2543         /* get constraint info */
2544         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2545         dual_dofs = size_of_constraint-primal_dofs;
2546 
2547         if (pcbddc->dbg_flag) {
2548           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2549         }
2550 
2551         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2552 
2553           /* copy quadrature constraints for change of basis check */
2554           if (pcbddc->dbg_flag) {
2555             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2556           }
2557           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2558           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2559 
2560           /* compute QR decomposition of constraints */
2561           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2562           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2563           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2564           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2565           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2566           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2567           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2568 
2569           /* explictly compute R^-T */
2570           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2571           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2572           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2573           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2574           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2575           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2576           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2577           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2578           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2579           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2580 
2581           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2582           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2583           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2584           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2585           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2586           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2587           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2588           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2589           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2590 
2591           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2592              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2593              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2594           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2595           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2596           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2597           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2598           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2599           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2600           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2601           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2602           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2603           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2604 
2605           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2606           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2607           /* insert cols for primal dofs */
2608           for (j=0;j<primal_dofs;j++) {
2609             start_vals = &qr_basis[j*size_of_constraint];
2610             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2611             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2612           }
2613           /* insert cols for dual dofs */
2614           for (j=0,k=0;j<dual_dofs;k++) {
2615             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2616               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2617               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2618               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2619               j++;
2620             }
2621           }
2622 
2623           /* check change of basis */
2624           if (pcbddc->dbg_flag) {
2625             PetscInt   ii,jj;
2626             PetscBool valid_qr=PETSC_TRUE;
2627             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2628             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2629             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2630             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2631             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2632             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2633             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2634             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2635             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2636             for (jj=0;jj<size_of_constraint;jj++) {
2637               for (ii=0;ii<primal_dofs;ii++) {
2638                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2639                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2640               }
2641             }
2642             if (!valid_qr) {
2643               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2644               for (jj=0;jj<size_of_constraint;jj++) {
2645                 for (ii=0;ii<primal_dofs;ii++) {
2646                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2647                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2648                   }
2649                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2650                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2651                   }
2652                 }
2653               }
2654             } else {
2655               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2656             }
2657           }
2658         } else { /* simple transformation block */
2659           PetscInt    row,col;
2660           PetscScalar val,norm;
2661 
2662           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2663           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2664           for (j=0;j<size_of_constraint;j++) {
2665             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2666             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2667             if (!PetscBTLookup(is_primal,row_B)) {
2668               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2669               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2670               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2671             } else {
2672               for (k=0;k<size_of_constraint;k++) {
2673                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2674                 if (row != col) {
2675                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2676                 } else {
2677                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2678                 }
2679                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2680               }
2681             }
2682           }
2683           if (pcbddc->dbg_flag) {
2684             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2685           }
2686         }
2687         /* increment primal counter */
2688         primal_counter += primal_dofs;
2689       } else {
2690         if (pcbddc->dbg_flag) {
2691           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2692         }
2693       }
2694       /* increment constraint counter total_counts */
2695       total_counts += primal_dofs;
2696     }
2697 
2698     /* free workspace */
2699     if (qr_needed) {
2700       if (pcbddc->dbg_flag) {
2701         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2702       }
2703       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2704       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2705       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2706       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2707       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2708     }
2709     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2710     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2712 
2713     /* assembling of global change of variable */
2714     {
2715       Mat      tmat;
2716       PetscInt bs;
2717 
2718       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2719       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2720       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2721       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2722       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2723       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2724       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2725       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2726       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2727       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2728       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2729       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2730       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2731       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2732       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2733       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2734       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2735       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2736     }
2737     /* check */
2738     if (pcbddc->dbg_flag) {
2739       PetscReal error;
2740       Vec       x,x_change;
2741 
2742       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2743       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2744       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2745       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2746       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2747       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2748       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2749       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2750       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2751       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2752       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2753       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2754       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2755       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2756       ierr = VecDestroy(&x);CHKERRQ(ierr);
2757       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2758     }
2759 
2760     /* adapt sub_schurs computed (if any) */
2761     if (pcbddc->use_deluxe_scaling) {
2762       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2763       if (sub_schurs->n_subs_par_g) {
2764         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2765       }
2766       if (sub_schurs->S_Ej_all) {
2767         Mat S_1,S_2,tmat;
2768         IS is_all_N;
2769 
2770         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2771         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2772         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2773         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2774         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2775         sub_schurs->S_Ej_all = S_1;
2776         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2777         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2778         sub_schurs->sum_S_Ej_all = S_2;
2779         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2780       }
2781     }
2782     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2783   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2784     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2785     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2786   }
2787 
2788   /* set up change of basis context */
2789   if (pcbddc->ChangeOfBasisMatrix) {
2790     PCBDDCChange_ctx change_ctx;
2791 
2792     if (!pcbddc->new_global_mat) {
2793       PetscInt global_size,local_size;
2794 
2795       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2796       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2797       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2798       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2799       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2800       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2801       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2802       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2803       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2804     } else {
2805       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2806       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2807       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2808     }
2809     if (!pcbddc->user_ChangeOfBasisMatrix) {
2810       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2811       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2812     } else {
2813       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2814       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2815     }
2816     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2817     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2818     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2820   }
2821 
2822   /* get indices in local ordering for vertices and constraints */
2823   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2824     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2825     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2826   }
2827   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2828   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2829   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2830   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2831   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2832   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2833   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2834   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2835   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2836   /* set quantities in PCBDDC data struct */
2837   pcbddc->n_actual_vertices = i;
2838   /* check if a new primal space has been introduced */
2839   pcbddc->new_primal_space_local = PETSC_TRUE;
2840   if (olocal_primal_size == pcbddc->local_primal_size) {
2841     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2842     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2843     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2844   }
2845   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2846   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2847 
2848   /* flush dbg viewer */
2849   if (pcbddc->dbg_flag) {
2850     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2851   }
2852 
2853   /* free workspace */
2854   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2855   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2856   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2857   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2858   if (!pcbddc->adaptive_selection) {
2859     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2860     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2861   } else {
2862     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2863                       pcbddc->adaptive_constraints_ptrs,
2864                       pcbddc->adaptive_constraints_idxs,
2865                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2866     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2867   }
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2873 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2874 {
2875   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2876   PC_IS       *pcis = (PC_IS*)pc->data;
2877   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2878   PetscInt    ierr,i,vertex_size;
2879   PetscViewer viewer=pcbddc->dbg_viewer;
2880 
2881   PetscFunctionBegin;
2882   /* Reset previously computed graph */
2883   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2884   /* Init local Graph struct */
2885   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2886 
2887   /* Check validity of the csr graph passed in by the user */
2888   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2889     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2890   }
2891 
2892   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2893   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2894     PetscInt  *xadj,*adjncy;
2895     PetscInt  nvtxs;
2896     PetscBool flg_row=PETSC_FALSE;
2897 
2898     if (pcbddc->use_local_adj) {
2899 
2900       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2901       if (flg_row) {
2902         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2903         pcbddc->computed_rowadj = PETSC_TRUE;
2904       }
2905       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2906     } else if (pcbddc->current_level) { /* just compute subdomain's connected components for coarser levels */
2907       IS                     is_dummy;
2908       ISLocalToGlobalMapping l2gmap_dummy;
2909       PetscInt               j,sum;
2910       PetscInt               *cxadj,*cadjncy;
2911       const PetscInt         *idxs;
2912       PCBDDCGraph            graph;
2913       PetscBT                is_on_boundary;
2914 
2915       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2916       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2917       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2918       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2919       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2920       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2921       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2922       if (flg_row) {
2923         graph->xadj = xadj;
2924         graph->adjncy = adjncy;
2925       }
2926       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2927       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2928       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2929 
2930       if (pcbddc->dbg_flag) {
2931         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2932         for (i=0;i<graph->ncc;i++) {
2933           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2934         }
2935         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2936       }
2937 
2938       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2939       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2940       for (i=0;i<pcis->n_B;i++) {
2941         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2942       }
2943       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2944 
2945       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2946       sum = 0;
2947       for (i=0;i<graph->ncc;i++) {
2948         PetscInt sizecc = 0;
2949         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2950           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2951             sizecc++;
2952           }
2953         }
2954         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2955           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2956             cxadj[graph->queue[j]] = sizecc;
2957           }
2958         }
2959         sum += sizecc*sizecc;
2960       }
2961       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2962       sum = 0;
2963       for (i=0;i<pcis->n;i++) {
2964         PetscInt temp = cxadj[i];
2965         cxadj[i] = sum;
2966         sum += temp;
2967       }
2968       cxadj[pcis->n] = sum;
2969       for (i=0;i<graph->ncc;i++) {
2970         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2971           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2972             PetscInt k,sizecc = 0;
2973             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2974               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2975                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2976                 sizecc++;
2977               }
2978             }
2979           }
2980         }
2981       }
2982       if (pcis->n) {
2983         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2984       } else {
2985         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2986         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2987       }
2988       graph->xadj = 0;
2989       graph->adjncy = 0;
2990       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2991       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2992     }
2993   }
2994 
2995   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2996   vertex_size = 1;
2997   if (pcbddc->user_provided_isfordofs) {
2998     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2999       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3000       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3001         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3002         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3003       }
3004       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3005       pcbddc->n_ISForDofs = 0;
3006       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3007     }
3008     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3009     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3010   } else {
3011     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3012       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3013       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3014       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3015         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3016       }
3017     }
3018   }
3019 
3020   /* Setup of Graph */
3021   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3022     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3023   }
3024   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3025     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3026   }
3027   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
3028 
3029   /* Graph's connected components analysis */
3030   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3031 
3032   /* print some info to stdout */
3033   if (pcbddc->dbg_flag) {
3034     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3035   }
3036 
3037   /* mark topography has done */
3038   pcbddc->recompute_topography = PETSC_FALSE;
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 #undef __FUNCT__
3043 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3044 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3045 {
3046   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3047   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3048   PetscErrorCode ierr;
3049 
3050   PetscFunctionBegin;
3051   n = 0;
3052   vertices = 0;
3053   if (pcbddc->ConstraintMatrix) {
3054     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3055     for (i=0;i<local_primal_size;i++) {
3056       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3057       if (size_of_constraint == 1) n++;
3058       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3059     }
3060     if (vertices_idx) {
3061       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3062       n = 0;
3063       for (i=0;i<local_primal_size;i++) {
3064         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3065         if (size_of_constraint == 1) {
3066           vertices[n++]=row_cmat_indices[0];
3067         }
3068         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3069       }
3070     }
3071   }
3072   *n_vertices = n;
3073   if (vertices_idx) *vertices_idx = vertices;
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 #undef __FUNCT__
3078 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3079 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3080 {
3081   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3082   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3083   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3084   PetscBT        touched;
3085   PetscErrorCode ierr;
3086 
3087     /* This function assumes that the number of local constraints per connected component
3088        is not greater than the number of nodes defined for the connected component
3089        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3090   PetscFunctionBegin;
3091   n = 0;
3092   constraints_index = 0;
3093   if (pcbddc->ConstraintMatrix) {
3094     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3095     max_size_of_constraint = 0;
3096     for (i=0;i<local_primal_size;i++) {
3097       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3098       if (size_of_constraint > 1) {
3099         n++;
3100       }
3101       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3102       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3103     }
3104     if (constraints_idx) {
3105       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3106       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3107       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3108       n = 0;
3109       for (i=0;i<local_primal_size;i++) {
3110         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3111         if (size_of_constraint > 1) {
3112           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3113           /* find first untouched local node */
3114           j = 0;
3115           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3116           min_index = row_cmat_global_indices[j];
3117           min_loc = j;
3118           /* search the minimum among nodes not yet touched on the connected component
3119              since there can be more than one constraint on a single cc */
3120           for (j=1;j<size_of_constraint;j++) {
3121             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3122               min_index = row_cmat_global_indices[j];
3123               min_loc = j;
3124             }
3125           }
3126           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3127           constraints_index[n++] = row_cmat_indices[min_loc];
3128         }
3129         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3130       }
3131       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3132       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3133     }
3134   }
3135   *n_constraints = n;
3136   if (constraints_idx) *constraints_idx = constraints_index;
3137   PetscFunctionReturn(0);
3138 }
3139 
3140 #undef __FUNCT__
3141 #define __FUNCT__ "PCBDDCSubsetNumbering"
3142 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3143 {
3144   Vec            local_vec,global_vec;
3145   IS             seqis,paris;
3146   VecScatter     scatter_ctx;
3147   PetscScalar    *array;
3148   PetscInt       *temp_global_dofs;
3149   PetscScalar    globalsum;
3150   PetscInt       i,j,s;
3151   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3152   PetscMPIInt    rank_prec_comm,size_prec_comm;
3153   PetscInt       *dof_sizes,*dof_displs;
3154   PetscBool      first_found;
3155   PetscErrorCode ierr;
3156 
3157   PetscFunctionBegin;
3158   /* mpi buffers */
3159   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3160   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3161   j = ( !rank_prec_comm ? size_prec_comm : 0);
3162   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3163   /* get maximum size of subset */
3164   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3165   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3166   max_local = 0;
3167   for (i=0;i<n_local_dofs;i++) {
3168     if (max_local < temp_global_dofs[i] ) {
3169       max_local = temp_global_dofs[i];
3170     }
3171   }
3172   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3173   max_global++;
3174   max_local = 0;
3175   for (i=0;i<n_local_dofs;i++) {
3176     if (max_local < local_dofs[i] ) {
3177       max_local = local_dofs[i];
3178     }
3179   }
3180   max_local++;
3181   /* allocate workspace */
3182   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3183   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3184   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3185   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3186   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3187   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3188   /* create scatter */
3189   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3190   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3191   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3192   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3193   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3194   /* init array */
3195   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3196   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3197   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3198   if (local_dofs_mult) {
3199     for (i=0;i<n_local_dofs;i++) {
3200       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3201     }
3202   } else {
3203     for (i=0;i<n_local_dofs;i++) {
3204       array[local_dofs[i]]=1.0;
3205     }
3206   }
3207   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3208   /* scatter into global vec and get total number of global dofs */
3209   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3210   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3211   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3212   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3213   /* Fill global_vec with cumulative function for global numbering */
3214   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3215   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3216   nlocals = 0;
3217   first_index = -1;
3218   first_found = PETSC_FALSE;
3219   for (i=0;i<s;i++) {
3220     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3221       first_found = PETSC_TRUE;
3222       first_index = i;
3223     }
3224     nlocals += (PetscInt)PetscRealPart(array[i]);
3225   }
3226   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3227   if (!rank_prec_comm) {
3228     dof_displs[0]=0;
3229     for (i=1;i<size_prec_comm;i++) {
3230       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3231     }
3232   }
3233   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3234   if (first_found) {
3235     array[first_index] += (PetscScalar)nlocals;
3236     old_index = first_index;
3237     for (i=first_index+1;i<s;i++) {
3238       if (PetscRealPart(array[i]) > 0.1) {
3239         array[i] += array[old_index];
3240         old_index = i;
3241       }
3242     }
3243   }
3244   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3245   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3246   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3247   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3248   /* get global ordering of local dofs */
3249   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3250   if (local_dofs_mult) {
3251     for (i=0;i<n_local_dofs;i++) {
3252       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3253     }
3254   } else {
3255     for (i=0;i<n_local_dofs;i++) {
3256       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3257     }
3258   }
3259   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3260   /* free workspace */
3261   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3262   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3263   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3264   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3265   /* return pointer to global ordering of local dofs */
3266   *global_numbering_subset = temp_global_dofs;
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3272 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3273 {
3274   PetscInt       i,j;
3275   PetscScalar    *alphas;
3276   PetscErrorCode ierr;
3277 
3278   PetscFunctionBegin;
3279   /* this implements stabilized Gram-Schmidt */
3280   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3281   for (i=0;i<n;i++) {
3282     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3283     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3284     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3285   }
3286   ierr = PetscFree(alphas);CHKERRQ(ierr);
3287   PetscFunctionReturn(0);
3288 }
3289 
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatISGetSubassemblingPattern"
3292 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3293 {
3294   Mat             subdomain_adj;
3295   IS              new_ranks,ranks_send_to;
3296   MatPartitioning partitioner;
3297   Mat_IS          *matis;
3298   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3299   PetscInt        prank;
3300   PetscMPIInt     size,rank,color;
3301   PetscInt        *xadj,*adjncy,*oldranks;
3302   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3303   PetscInt        i,local_size,threshold=0;
3304   PetscErrorCode  ierr;
3305   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3306   PetscSubcomm    subcomm;
3307 
3308   PetscFunctionBegin;
3309   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3310   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3311   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3312 
3313   /* Get info on mapping */
3314   matis = (Mat_IS*)(mat->data);
3315   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3316   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3317 
3318   /* build local CSR graph of subdomains' connectivity */
3319   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3320   xadj[0] = 0;
3321   xadj[1] = PetscMax(n_neighs-1,0);
3322   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3323   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3324 
3325   if (threshold) {
3326     PetscInt xadj_count = 0;
3327     for (i=1;i<n_neighs;i++) {
3328       if (n_shared[i] > threshold) {
3329         adjncy[xadj_count] = neighs[i];
3330         adjncy_wgt[xadj_count] = n_shared[i];
3331         xadj_count++;
3332       }
3333     }
3334     xadj[1] = xadj_count;
3335   } else {
3336     if (xadj[1]) {
3337       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3338       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3339     }
3340   }
3341   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3342   if (use_square) {
3343     for (i=0;i<xadj[1];i++) {
3344       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3345     }
3346   }
3347   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3348 
3349   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3350 
3351   /*
3352     Restrict work on active processes only.
3353   */
3354   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3355   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3356   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3357   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3358   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3359   if (color) {
3360     ierr = PetscFree(xadj);CHKERRQ(ierr);
3361     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3362     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3363   } else {
3364     PetscInt coarsening_ratio;
3365     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3366     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3367     prank = rank;
3368     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3369     /*
3370     for (i=0;i<size;i++) {
3371       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3372     }
3373     */
3374     for (i=0;i<xadj[1];i++) {
3375       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3376     }
3377     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3378     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3379     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3380 
3381     /* Partition */
3382     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3383     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3384     if (use_vwgt) {
3385       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3386       v_wgt[0] = local_size;
3387       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3388     }
3389     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3390     coarsening_ratio = size/n_subdomains;
3391     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3392     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3393     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3394     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3395 
3396     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3397     if (contiguous) {
3398       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3399     } else {
3400       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3401     }
3402     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3403     /* clean up */
3404     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3405     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3406     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3407     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3408   }
3409   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3410 
3411   /* assemble parallel IS for sends */
3412   i = 1;
3413   if (color) i=0;
3414   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3415 
3416   /* get back IS */
3417   *is_sends = ranks_send_to;
3418   PetscFunctionReturn(0);
3419 }
3420 
3421 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3422 
3423 #undef __FUNCT__
3424 #define __FUNCT__ "MatISSubassemble"
3425 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3426 {
3427   Mat                    local_mat;
3428   Mat_IS                 *matis;
3429   IS                     is_sends_internal;
3430   PetscInt               rows,cols,new_local_rows;
3431   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3432   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3433   ISLocalToGlobalMapping l2gmap;
3434   PetscInt*              l2gmap_indices;
3435   const PetscInt*        is_indices;
3436   MatType                new_local_type;
3437   /* buffers */
3438   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3439   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3440   PetscInt               *recv_buffer_idxs_local;
3441   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3442   /* MPI */
3443   MPI_Comm               comm,comm_n;
3444   PetscSubcomm           subcomm;
3445   PetscMPIInt            n_sends,n_recvs,commsize;
3446   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3447   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3448   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3449   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3450   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3451   PetscErrorCode         ierr;
3452 
3453   PetscFunctionBegin;
3454   /* TODO: add missing checks */
3455   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3456   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3457   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3458   PetscValidLogicalCollectiveInt(mat,nis,7);
3459   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3460   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3461   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3462   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3463   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3464   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3465   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3466   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3467     PetscInt mrows,mcols,mnrows,mncols;
3468     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3469     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3470     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3471     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3472     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3473     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3474   }
3475   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3476   PetscValidLogicalCollectiveInt(mat,bs,0);
3477   /* prepare IS for sending if not provided */
3478   if (!is_sends) {
3479     PetscBool pcontig = PETSC_TRUE;
3480     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3481     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3482   } else {
3483     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3484     is_sends_internal = is_sends;
3485   }
3486 
3487   /* get pointer of MATIS data */
3488   matis = (Mat_IS*)mat->data;
3489 
3490   /* get comm */
3491   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3492 
3493   /* compute number of sends */
3494   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3495   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3496 
3497   /* compute number of receives */
3498   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3499   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3500   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3501   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3502   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3503   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3504   ierr = PetscFree(iflags);CHKERRQ(ierr);
3505 
3506   /* restrict comm if requested */
3507   subcomm = 0;
3508   destroy_mat = PETSC_FALSE;
3509   if (restrict_comm) {
3510     PetscMPIInt color,subcommsize;
3511 
3512     color = 0;
3513     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3514     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3515     subcommsize = commsize - subcommsize;
3516     /* check if reuse has been requested */
3517     if (reuse == MAT_REUSE_MATRIX) {
3518       if (*mat_n) {
3519         PetscMPIInt subcommsize2;
3520         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3521         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3522         comm_n = PetscObjectComm((PetscObject)*mat_n);
3523       } else {
3524         comm_n = PETSC_COMM_SELF;
3525       }
3526     } else { /* MAT_INITIAL_MATRIX */
3527       PetscMPIInt rank;
3528 
3529       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3530       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3531       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3532       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3533       comm_n = PetscSubcommChild(subcomm);
3534     }
3535     /* flag to destroy *mat_n if not significative */
3536     if (color) destroy_mat = PETSC_TRUE;
3537   } else {
3538     comm_n = comm;
3539   }
3540 
3541   /* prepare send/receive buffers */
3542   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3543   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3544   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3545   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3546   if (nis) {
3547     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3548   }
3549 
3550   /* Get data from local matrices */
3551   if (!isdense) {
3552     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3553     /* TODO: See below some guidelines on how to prepare the local buffers */
3554     /*
3555        send_buffer_vals should contain the raw values of the local matrix
3556        send_buffer_idxs should contain:
3557        - MatType_PRIVATE type
3558        - PetscInt        size_of_l2gmap
3559        - PetscInt        global_row_indices[size_of_l2gmap]
3560        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3561     */
3562   } else {
3563     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3564     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3565     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3566     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3567     send_buffer_idxs[1] = i;
3568     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3569     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3570     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3571     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3572     for (i=0;i<n_sends;i++) {
3573       ilengths_vals[is_indices[i]] = len*len;
3574       ilengths_idxs[is_indices[i]] = len+2;
3575     }
3576   }
3577   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3578   /* additional is (if any) */
3579   if (nis) {
3580     PetscMPIInt psum;
3581     PetscInt j;
3582     for (j=0,psum=0;j<nis;j++) {
3583       PetscInt plen;
3584       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3585       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3586       psum += len+1; /* indices + lenght */
3587     }
3588     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3589     for (j=0,psum=0;j<nis;j++) {
3590       PetscInt plen;
3591       const PetscInt *is_array_idxs;
3592       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3593       send_buffer_idxs_is[psum] = plen;
3594       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3595       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3596       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3597       psum += plen+1; /* indices + lenght */
3598     }
3599     for (i=0;i<n_sends;i++) {
3600       ilengths_idxs_is[is_indices[i]] = psum;
3601     }
3602     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3603   }
3604 
3605   buf_size_idxs = 0;
3606   buf_size_vals = 0;
3607   buf_size_idxs_is = 0;
3608   for (i=0;i<n_recvs;i++) {
3609     buf_size_idxs += (PetscInt)olengths_idxs[i];
3610     buf_size_vals += (PetscInt)olengths_vals[i];
3611     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3612   }
3613   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3614   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3615   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3616 
3617   /* get new tags for clean communications */
3618   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3619   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3620   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3621 
3622   /* allocate for requests */
3623   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3624   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3625   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3626   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3627   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3628   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3629 
3630   /* communications */
3631   ptr_idxs = recv_buffer_idxs;
3632   ptr_vals = recv_buffer_vals;
3633   ptr_idxs_is = recv_buffer_idxs_is;
3634   for (i=0;i<n_recvs;i++) {
3635     source_dest = onodes[i];
3636     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3637     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3638     ptr_idxs += olengths_idxs[i];
3639     ptr_vals += olengths_vals[i];
3640     if (nis) {
3641       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3642       ptr_idxs_is += olengths_idxs_is[i];
3643     }
3644   }
3645   for (i=0;i<n_sends;i++) {
3646     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3647     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3648     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3649     if (nis) {
3650       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3651     }
3652   }
3653   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3654   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3655 
3656   /* assemble new l2g map */
3657   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3658   ptr_idxs = recv_buffer_idxs;
3659   new_local_rows = 0;
3660   for (i=0;i<n_recvs;i++) {
3661     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3662     ptr_idxs += olengths_idxs[i];
3663   }
3664   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3665   ptr_idxs = recv_buffer_idxs;
3666   new_local_rows = 0;
3667   for (i=0;i<n_recvs;i++) {
3668     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3669     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3670     ptr_idxs += olengths_idxs[i];
3671   }
3672   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3673   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3674   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3675 
3676   /* infer new local matrix type from received local matrices type */
3677   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3678   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3679   if (n_recvs) {
3680     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3681     ptr_idxs = recv_buffer_idxs;
3682     for (i=0;i<n_recvs;i++) {
3683       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3684         new_local_type_private = MATAIJ_PRIVATE;
3685         break;
3686       }
3687       ptr_idxs += olengths_idxs[i];
3688     }
3689     switch (new_local_type_private) {
3690       case MATDENSE_PRIVATE:
3691         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3692           new_local_type = MATSEQAIJ;
3693           bs = 1;
3694         } else { /* if I receive only 1 dense matrix */
3695           new_local_type = MATSEQDENSE;
3696           bs = 1;
3697         }
3698         break;
3699       case MATAIJ_PRIVATE:
3700         new_local_type = MATSEQAIJ;
3701         bs = 1;
3702         break;
3703       case MATBAIJ_PRIVATE:
3704         new_local_type = MATSEQBAIJ;
3705         break;
3706       case MATSBAIJ_PRIVATE:
3707         new_local_type = MATSEQSBAIJ;
3708         break;
3709       default:
3710         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3711         break;
3712     }
3713   } else { /* by default, new_local_type is seqdense */
3714     new_local_type = MATSEQDENSE;
3715     bs = 1;
3716   }
3717 
3718   /* create MATIS object if needed */
3719   if (reuse == MAT_INITIAL_MATRIX) {
3720     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3721     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3722   } else {
3723     /* it also destroys the local matrices */
3724     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3725   }
3726   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3727   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3728 
3729   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3730 
3731   /* Global to local map of received indices */
3732   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3733   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3734   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3735 
3736   /* restore attributes -> type of incoming data and its size */
3737   buf_size_idxs = 0;
3738   for (i=0;i<n_recvs;i++) {
3739     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3740     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3741     buf_size_idxs += (PetscInt)olengths_idxs[i];
3742   }
3743   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3744 
3745   /* set preallocation */
3746   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3747   if (!newisdense) {
3748     PetscInt *new_local_nnz=0;
3749 
3750     ptr_vals = recv_buffer_vals;
3751     ptr_idxs = recv_buffer_idxs_local;
3752     if (n_recvs) {
3753       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3754     }
3755     for (i=0;i<n_recvs;i++) {
3756       PetscInt j;
3757       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3758         for (j=0;j<*(ptr_idxs+1);j++) {
3759           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3760         }
3761       } else {
3762         /* TODO */
3763       }
3764       ptr_idxs += olengths_idxs[i];
3765     }
3766     if (new_local_nnz) {
3767       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3768       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3769       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3770       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3771       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3772       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3773     } else {
3774       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3775     }
3776     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3777   } else {
3778     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3779   }
3780 
3781   /* set values */
3782   ptr_vals = recv_buffer_vals;
3783   ptr_idxs = recv_buffer_idxs_local;
3784   for (i=0;i<n_recvs;i++) {
3785     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3786       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3787       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3788       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3789       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3790       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3791     } else {
3792       /* TODO */
3793     }
3794     ptr_idxs += olengths_idxs[i];
3795     ptr_vals += olengths_vals[i];
3796   }
3797   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3798   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3799   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3800   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3801   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3802   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3803 
3804 #if 0
3805   if (!restrict_comm) { /* check */
3806     Vec       lvec,rvec;
3807     PetscReal infty_error;
3808 
3809     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3810     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3811     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3812     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3813     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3814     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3815     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3816     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3817     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3818   }
3819 #endif
3820 
3821   /* assemble new additional is (if any) */
3822   if (nis) {
3823     PetscInt **temp_idxs,*count_is,j,psum;
3824 
3825     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3826     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3827     ptr_idxs = recv_buffer_idxs_is;
3828     psum = 0;
3829     for (i=0;i<n_recvs;i++) {
3830       for (j=0;j<nis;j++) {
3831         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3832         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3833         psum += plen;
3834         ptr_idxs += plen+1; /* shift pointer to received data */
3835       }
3836     }
3837     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3838     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3839     for (i=1;i<nis;i++) {
3840       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3841     }
3842     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3843     ptr_idxs = recv_buffer_idxs_is;
3844     for (i=0;i<n_recvs;i++) {
3845       for (j=0;j<nis;j++) {
3846         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3847         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3848         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3849         ptr_idxs += plen+1; /* shift pointer to received data */
3850       }
3851     }
3852     for (i=0;i<nis;i++) {
3853       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3854       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3855       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3856     }
3857     ierr = PetscFree(count_is);CHKERRQ(ierr);
3858     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3859     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3860   }
3861   /* free workspace */
3862   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3863   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3864   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3865   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3866   if (isdense) {
3867     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3868     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3869   } else {
3870     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3871   }
3872   if (nis) {
3873     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3874     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3875   }
3876   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3877   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3878   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3879   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3880   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3881   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3882   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3883   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3884   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3885   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3886   ierr = PetscFree(onodes);CHKERRQ(ierr);
3887   if (nis) {
3888     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3889     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3890     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3891   }
3892   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3893   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3894     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3895     for (i=0;i<nis;i++) {
3896       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3897     }
3898   }
3899   PetscFunctionReturn(0);
3900 }
3901 
3902 /* temporary hack into ksp private data structure */
3903 #include <petsc-private/kspimpl.h>
3904 
3905 #undef __FUNCT__
3906 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3907 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3908 {
3909   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3910   PC_IS                  *pcis = (PC_IS*)pc->data;
3911   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3912   MatNullSpace           CoarseNullSpace=NULL;
3913   ISLocalToGlobalMapping coarse_islg;
3914   IS                     coarse_is,*isarray;
3915   PetscInt               i,im_active=-1,active_procs=-1;
3916   PetscInt               nis,nisdofs,nisneu;
3917   PC                     pc_temp;
3918   PCType                 coarse_pc_type;
3919   KSPType                coarse_ksp_type;
3920   PetscBool              multilevel_requested,multilevel_allowed;
3921   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3922   Mat                    t_coarse_mat_is;
3923   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3924   PetscMPIInt            all_procs;
3925   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3926   PetscBool              compute_vecs = PETSC_FALSE;
3927   PetscScalar            *array;
3928   PetscErrorCode         ierr;
3929 
3930   PetscFunctionBegin;
3931   /* Assign global numbering to coarse dofs */
3932   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3933     PetscInt ocoarse_size;
3934     compute_vecs = PETSC_TRUE;
3935     ocoarse_size = pcbddc->coarse_size;
3936     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3937     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3938     /* see if we can avoid some work */
3939     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3940       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3941         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3942         coarse_reuse = PETSC_FALSE;
3943       } else { /* we can safely reuse already computed coarse matrix */
3944         coarse_reuse = PETSC_TRUE;
3945       }
3946     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3947       coarse_reuse = PETSC_FALSE;
3948     }
3949     /* reset any subassembling information */
3950     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3951     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3952   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3953     coarse_reuse = PETSC_TRUE;
3954   }
3955 
3956   /* count "active" (i.e. with positive local size) and "void" processes */
3957   im_active = !!(pcis->n);
3958   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3959   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3960   void_procs = all_procs-active_procs;
3961   csin_type_simple = PETSC_TRUE;
3962   redist = PETSC_FALSE;
3963   if (pcbddc->current_level && void_procs) {
3964     csin_ml = PETSC_TRUE;
3965     ncoarse_ml = void_procs;
3966     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3967     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3968       csin_ds = PETSC_TRUE;
3969       ncoarse_ds = pcbddc->redistribute_coarse;
3970       redist = PETSC_TRUE;
3971     } else {
3972       csin_ds = PETSC_TRUE;
3973       ncoarse_ds = active_procs;
3974       redist = PETSC_TRUE;
3975     }
3976   } else {
3977     csin_ml = PETSC_FALSE;
3978     ncoarse_ml = all_procs;
3979     if (void_procs) {
3980       csin_ds = PETSC_TRUE;
3981       ncoarse_ds = void_procs;
3982       csin_type_simple = PETSC_FALSE;
3983     } else {
3984       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3985         csin_ds = PETSC_TRUE;
3986         ncoarse_ds = pcbddc->redistribute_coarse;
3987         redist = PETSC_TRUE;
3988       } else {
3989         csin_ds = PETSC_FALSE;
3990         ncoarse_ds = all_procs;
3991       }
3992     }
3993   }
3994 
3995   /*
3996     test if we can go multilevel: three conditions must be satisfied:
3997     - we have not exceeded the number of levels requested
3998     - we can actually subassemble the active processes
3999     - we can find a suitable number of MPI processes where we can place the subassembled problem
4000   */
4001   multilevel_allowed = PETSC_FALSE;
4002   multilevel_requested = PETSC_FALSE;
4003   if (pcbddc->current_level < pcbddc->max_levels) {
4004     multilevel_requested = PETSC_TRUE;
4005     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4006       multilevel_allowed = PETSC_FALSE;
4007     } else {
4008       multilevel_allowed = PETSC_TRUE;
4009     }
4010   }
4011   /* determine number of process partecipating to coarse solver */
4012   if (multilevel_allowed) {
4013     ncoarse = ncoarse_ml;
4014     csin = csin_ml;
4015     redist = PETSC_FALSE;
4016   } else {
4017     ncoarse = ncoarse_ds;
4018     csin = csin_ds;
4019   }
4020 
4021   /* creates temporary l2gmap and IS for coarse indexes */
4022   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4023   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4024 
4025   /* creates temporary MATIS object for coarse matrix */
4026   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4027   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4028   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4029   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4030 #if 0
4031   {
4032     PetscViewer viewer;
4033     char filename[256];
4034     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4035     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4036     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4037     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4038     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4039   }
4040 #endif
4041   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4042   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4043   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4044   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4045   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4046 
4047   /* compute dofs splitting and neumann boundaries for coarse dofs */
4048   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4049     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4050     const PetscInt         *idxs;
4051     ISLocalToGlobalMapping tmap;
4052 
4053     /* create map between primal indices (in local representative ordering) and local primal numbering */
4054     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4055     /* allocate space for temporary storage */
4056     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4057     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4058     /* allocate for IS array */
4059     nisdofs = pcbddc->n_ISForDofsLocal;
4060     nisneu = !!pcbddc->NeumannBoundariesLocal;
4061     nis = nisdofs + nisneu;
4062     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4063     /* dofs splitting */
4064     for (i=0;i<nisdofs;i++) {
4065       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4066       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4067       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4068       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4069       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4070       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4071       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4072       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4073     }
4074     /* neumann boundaries */
4075     if (pcbddc->NeumannBoundariesLocal) {
4076       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4077       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4078       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4079       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4080       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4081       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4082       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4083       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4084     }
4085     /* free memory */
4086     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4087     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4088     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4089   } else {
4090     nis = 0;
4091     nisdofs = 0;
4092     nisneu = 0;
4093     isarray = NULL;
4094   }
4095   /* destroy no longer needed map */
4096   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4097 
4098   /* restrict on coarse candidates (if needed) */
4099   coarse_mat_is = NULL;
4100   if (csin) {
4101     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4102       if (redist) {
4103         PetscMPIInt rank;
4104         PetscInt    spc,n_spc_p1,dest[1],destsize;
4105 
4106         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4107         spc = active_procs/ncoarse;
4108         n_spc_p1 = active_procs%ncoarse;
4109         if (im_active) {
4110           destsize = 1;
4111           if (rank > n_spc_p1*(spc+1)-1) {
4112             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4113           } else {
4114             dest[0] = rank/(spc+1);
4115           }
4116         } else {
4117           destsize = 0;
4118         }
4119         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4120       } else if (csin_type_simple) {
4121         PetscMPIInt rank;
4122         PetscInt    issize,isidx;
4123 
4124         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4125         if (im_active) {
4126           issize = 1;
4127           isidx = (PetscInt)rank;
4128         } else {
4129           issize = 0;
4130           isidx = -1;
4131         }
4132         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4133       } else { /* get a suitable subassembling pattern from MATIS code */
4134         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4135       }
4136 
4137       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4138       if (!redist || ncoarse <= void_procs) {
4139         PetscInt ncoarse_cand,tissize,*nisindices;
4140         PetscInt *coarse_candidates;
4141         const PetscInt* tisindices;
4142 
4143         /* get coarse candidates' ranks in pc communicator */
4144         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4145         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4146         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4147           if (!coarse_candidates[i]) {
4148             coarse_candidates[ncoarse_cand++]=i;
4149           }
4150         }
4151         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4152 
4153 
4154         if (pcbddc->dbg_flag) {
4155           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4156           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4157           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4158           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4159           for (i=0;i<ncoarse_cand;i++) {
4160             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4161           }
4162           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4163           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4164         }
4165         /* shift the pattern on coarse candidates */
4166         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4167         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4168         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4169         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4170         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4171         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4172         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4173       }
4174       if (pcbddc->dbg_flag) {
4175         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4176         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4177         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4178         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4179       }
4180     }
4181     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4182     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4183   } else {
4184     if (pcbddc->dbg_flag) {
4185       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4186       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4187       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4188     }
4189     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4190     coarse_mat_is = t_coarse_mat_is;
4191   }
4192 
4193   /* create local to global scatters for coarse problem */
4194   if (compute_vecs) {
4195     PetscInt lrows;
4196     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4197     if (coarse_mat_is) {
4198       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4199     } else {
4200       lrows = 0;
4201     }
4202     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4203     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4204     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4205     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4206     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4207   }
4208   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4209   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4210 
4211   /* set defaults for coarse KSP and PC */
4212   if (multilevel_allowed) {
4213     coarse_ksp_type = KSPRICHARDSON;
4214     coarse_pc_type = PCBDDC;
4215   } else {
4216     coarse_ksp_type = KSPPREONLY;
4217     coarse_pc_type = PCREDUNDANT;
4218   }
4219 
4220   /* print some info if requested */
4221   if (pcbddc->dbg_flag) {
4222     if (!multilevel_allowed) {
4223       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4224       if (multilevel_requested) {
4225         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4226       } else if (pcbddc->max_levels) {
4227         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4228       }
4229       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4230     }
4231   }
4232 
4233   /* create the coarse KSP object only once with defaults */
4234   if (coarse_mat_is) {
4235     MatReuse coarse_mat_reuse;
4236     PetscViewer dbg_viewer = NULL;
4237     if (pcbddc->dbg_flag) {
4238       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4239       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4240     }
4241     if (!pcbddc->coarse_ksp) {
4242       char prefix[256],str_level[16];
4243       size_t len;
4244       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4245       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4246       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4247       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4248       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4249       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4250       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4251       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4252       /* prefix */
4253       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4254       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4255       if (!pcbddc->current_level) {
4256         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4257         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4258       } else {
4259         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4260         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4261         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4262         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4263         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4264         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4265       }
4266       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4267       /* allow user customization */
4268       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4269       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4270     }
4271 
4272     /* get some info after set from options */
4273     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4274     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4275     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4276     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4277     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4278       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4279       isbddc = PETSC_FALSE;
4280     }
4281     if (isredundant) {
4282       KSP inner_ksp;
4283       PC inner_pc;
4284       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4285       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4286       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4287     }
4288 
4289     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4290     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4291     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4292     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4293     if (nisdofs) {
4294       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4295       for (i=0;i<nisdofs;i++) {
4296         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4297       }
4298     }
4299     if (nisneu) {
4300       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4301       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4302     }
4303 
4304     /* assemble coarse matrix */
4305     if (coarse_reuse) {
4306       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4307       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4308       coarse_mat_reuse = MAT_REUSE_MATRIX;
4309     } else {
4310       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4311     }
4312     if (isbddc || isnn) {
4313       if (pcbddc->coarsening_ratio > 1) {
4314         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4315           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4316           if (pcbddc->dbg_flag) {
4317             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4318             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4319             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4320             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4321           }
4322         }
4323         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4324       } else {
4325         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4326         coarse_mat = coarse_mat_is;
4327       }
4328     } else {
4329       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4330     }
4331     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4332 
4333     /* propagate symmetry info to coarse matrix */
4334     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4335     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4336 
4337     /* set operators */
4338     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4339     if (pcbddc->dbg_flag) {
4340       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4341     }
4342   } else { /* processes non partecipating to coarse solver (if any) */
4343     coarse_mat = 0;
4344   }
4345   ierr = PetscFree(isarray);CHKERRQ(ierr);
4346 #if 0
4347   {
4348     PetscViewer viewer;
4349     char filename[256];
4350     sprintf(filename,"coarse_mat.m");
4351     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4352     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4353     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4354     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4355   }
4356 #endif
4357 
4358   /* Compute coarse null space (special handling by BDDC only) */
4359   if (pcbddc->NullSpace) {
4360     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4361   }
4362 
4363   if (pcbddc->coarse_ksp) {
4364     Vec crhs,csol;
4365     PetscBool ispreonly;
4366     if (CoarseNullSpace) {
4367       if (isbddc) {
4368         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4369       } else {
4370         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4371       }
4372     }
4373     /* setup coarse ksp */
4374     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4375     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4376     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4377     /* hack */
4378     if (!csol) {
4379       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4380     }
4381     if (!crhs) {
4382       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4383     }
4384     /* Check coarse problem if in debug mode or if solving with an iterative method */
4385     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4386     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4387       KSP       check_ksp;
4388       KSPType   check_ksp_type;
4389       PC        check_pc;
4390       Vec       check_vec,coarse_vec;
4391       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4392       PetscInt  its;
4393       PetscBool compute_eigs;
4394       PetscReal *eigs_r,*eigs_c;
4395       PetscInt  neigs;
4396       const char *prefix;
4397 
4398       /* Create ksp object suitable for estimation of extreme eigenvalues */
4399       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4400       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4401       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4402       if (ispreonly) {
4403         check_ksp_type = KSPPREONLY;
4404         compute_eigs = PETSC_FALSE;
4405       } else {
4406         check_ksp_type = KSPGMRES;
4407         compute_eigs = PETSC_TRUE;
4408       }
4409       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4410       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4411       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4412       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4413       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4414       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4415       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4416       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4417       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4418       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4419       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4420       /* create random vec */
4421       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4422       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4423       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4424       if (CoarseNullSpace) {
4425         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4426       }
4427       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4428       /* solve coarse problem */
4429       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4430       if (CoarseNullSpace) {
4431         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4432       }
4433       /* set eigenvalue estimation if preonly has not been requested */
4434       if (compute_eigs) {
4435         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4436         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4437         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4438         lambda_max = eigs_r[neigs-1];
4439         lambda_min = eigs_r[0];
4440         if (pcbddc->use_coarse_estimates) {
4441           if (lambda_max>lambda_min) {
4442             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4443             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4444           }
4445         }
4446       }
4447 
4448       /* check coarse problem residual error */
4449       if (pcbddc->dbg_flag) {
4450         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4451         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4452         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4453         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4454         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4455         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4456         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4457         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4458         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4459         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4460         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4461         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4462         if (compute_eigs) {
4463           PetscReal lambda_max_s,lambda_min_s;
4464           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4465           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4466           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4467           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4468           for (i=0;i<neigs;i++) {
4469             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4470           }
4471         }
4472         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4473         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4474       }
4475       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4476       if (compute_eigs) {
4477         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4478         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4479       }
4480     }
4481   }
4482   /* print additional info */
4483   if (pcbddc->dbg_flag) {
4484     /* waits until all processes reaches this point */
4485     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4486     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4487     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4488   }
4489 
4490   /* free memory */
4491   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4492   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4493   PetscFunctionReturn(0);
4494 }
4495 
4496 #undef __FUNCT__
4497 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4498 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4499 {
4500   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4501   PC_IS*         pcis = (PC_IS*)pc->data;
4502   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4503   PetscInt       i,coarse_size=0;
4504   PetscInt       *local_primal_indices=NULL;
4505   PetscErrorCode ierr;
4506 
4507   PetscFunctionBegin;
4508   /* Compute global number of coarse dofs */
4509   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4510     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4511   }
4512   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4513 
4514   /* check numbering */
4515   if (pcbddc->dbg_flag) {
4516     PetscScalar coarsesum,*array;
4517     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4518 
4519     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4520     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4521     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4522     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4523     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4524     for (i=0;i<pcbddc->local_primal_size;i++) {
4525       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4526     }
4527     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4528     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4529     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4530     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4531     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4532     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4533     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4534     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4535     for (i=0;i<pcis->n;i++) {
4536       if (array[i] == 1.0) {
4537         set_error = PETSC_TRUE;
4538         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4539       }
4540     }
4541     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4542     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4543     for (i=0;i<pcis->n;i++) {
4544       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4545     }
4546     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4547     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4548     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4549     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4550     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4551     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4552     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4553       PetscInt *gidxs;
4554 
4555       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4556       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4557       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4558       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4559       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4560       for (i=0;i<pcbddc->local_primal_size;i++) {
4561         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
4562       }
4563       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4564       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4565     }
4566     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4567     if (set_error_reduced) {
4568       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4569     }
4570   }
4571   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4572   /* get back data */
4573   *coarse_size_n = coarse_size;
4574   *local_primal_indices_n = local_primal_indices;
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 #undef __FUNCT__
4579 #define __FUNCT__ "PCBDDCGlobalToLocal"
4580 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4581 {
4582   IS             localis_t;
4583   PetscInt       i,lsize,*idxs,n;
4584   PetscScalar    *vals;
4585   PetscErrorCode ierr;
4586 
4587   PetscFunctionBegin;
4588   /* get indices in local ordering exploiting local to global map */
4589   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4590   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4591   for (i=0;i<lsize;i++) vals[i] = 1.0;
4592   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4593   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4594   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4595   if (idxs) { /* multilevel guard */
4596     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4597   }
4598   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4599   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4600   ierr = PetscFree(vals);CHKERRQ(ierr);
4601   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4602   /* now compute set in local ordering */
4603   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4604   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4605   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4606   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4607   for (i=0,lsize=0;i<n;i++) {
4608     if (PetscRealPart(vals[i]) > 0.5) {
4609       lsize++;
4610     }
4611   }
4612   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4613   for (i=0,lsize=0;i<n;i++) {
4614     if (PetscRealPart(vals[i]) > 0.5) {
4615       idxs[lsize++] = i;
4616     }
4617   }
4618   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4619   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4620   *localis = localis_t;
4621   PetscFunctionReturn(0);
4622 }
4623 
4624 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4625 #undef __FUNCT__
4626 #define __FUNCT__ "PCBDDCMatMult_Private"
4627 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4628 {
4629   PCBDDCChange_ctx change_ctx;
4630   PetscErrorCode   ierr;
4631 
4632   PetscFunctionBegin;
4633   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4634   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4635   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4636   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4637   PetscFunctionReturn(0);
4638 }
4639 
4640 #undef __FUNCT__
4641 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4642 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4643 {
4644   PCBDDCChange_ctx change_ctx;
4645   PetscErrorCode   ierr;
4646 
4647   PetscFunctionBegin;
4648   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4649   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4650   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4651   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4652   PetscFunctionReturn(0);
4653 }
4654 
4655 #undef __FUNCT__
4656 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4657 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4658 {
4659   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4660   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4661   PetscInt            *used_xadj,*used_adjncy;
4662   PetscBool           free_used_adj;
4663   PetscErrorCode      ierr;
4664 
4665   PetscFunctionBegin;
4666   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4667   free_used_adj = PETSC_FALSE;
4668   if (pcbddc->sub_schurs_layers == -1) {
4669     used_xadj = NULL;
4670     used_adjncy = NULL;
4671   } else {
4672     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4673       used_xadj = pcbddc->mat_graph->xadj;
4674       used_adjncy = pcbddc->mat_graph->adjncy;
4675     } else if (pcbddc->computed_rowadj) {
4676       used_xadj = pcbddc->mat_graph->xadj;
4677       used_adjncy = pcbddc->mat_graph->adjncy;
4678     } else {
4679       PetscBool      flg_row=PETSC_FALSE;
4680       const PetscInt *xadj,*adjncy;
4681       PetscInt       nvtxs;
4682 
4683       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4684       if (flg_row) {
4685         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4686         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4687         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4688         free_used_adj = PETSC_TRUE;
4689       } else {
4690         pcbddc->sub_schurs_layers = -1;
4691         used_xadj = NULL;
4692         used_adjncy = NULL;
4693       }
4694       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4695     }
4696   }
4697   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4698 
4699   /* free adjacency */
4700   if (free_used_adj) {
4701     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4702   }
4703   PetscFunctionReturn(0);
4704 }
4705 
4706 #undef __FUNCT__
4707 #define __FUNCT__ "PCBDDCInitSubSchurs"
4708 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4709 {
4710   PC_IS               *pcis=(PC_IS*)pc->data;
4711   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4712   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4713   PCBDDCGraph         graph;
4714   Mat                 S_j;
4715   PetscErrorCode      ierr;
4716 
4717   PetscFunctionBegin;
4718   /* attach interface graph for determining subsets */
4719   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4720     IS verticesIS;
4721 
4722     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4723     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4724     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4725     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4726     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4727     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4728 /*
4729     if (pcbddc->dbg_flag) {
4730       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4731     }
4732 */
4733   } else {
4734     graph = pcbddc->mat_graph;
4735   }
4736 
4737   /* Create Schur complement matrix */
4738   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4739   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4740 
4741   /* sub_schurs init */
4742   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4743   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4744   /* free graph struct */
4745   if (pcbddc->sub_schurs_rebuild) {
4746     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4747   }
4748   PetscFunctionReturn(0);
4749 }
4750