xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 8a0068c30a1d3351f82f0889d1ab193c849df9bd)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
124                       nv+cum,&pcbddc->adaptive_constraints_idxs,
125                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
126   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   maxneigs = 0;
129   cum = cum2 = cumarray = 0;
130   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
131     const PetscInt *idxs;
132 
133     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
134     for (cum=0;cum<nv;cum++) {
135       pcbddc->adaptive_constraints_n[cum] = 1;
136       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
137       pcbddc->adaptive_constraints_ptrs[cum] = cum;
138       pcbddc->adaptive_constraints_data[cum] = 1.0;
139     }
140     cum2 = cum;
141     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
142   }
143 
144   if (mss) { /* multilevel */
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
147   }
148 
149   for (i=0;i<sub_schurs->n_subs;i++) {
150     PetscInt j,subset_size;
151 
152     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
153     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
154       const PetscInt *idxs;
155       PetscReal      infty = PETSC_MAX_REAL;
156       PetscInt       eigs_start = 0;
157       PetscBLASInt   B_N;
158 
159       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
160       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
161         if (sub_schurs->is_hermitian) {
162           PetscInt j;
163           for (j=0;j<subset_size;j++) {
164             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
165           }
166           for (j=0;j<subset_size;j++) {
167             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
168           }
169         } else {
170           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         }
173       } else {
174         S = Sarray + cumarray;
175         St = Starray + cumarray;
176       }
177 
178       /* Threshold: this is an heuristic for edges */
179       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
180       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
181 
182       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
183         PetscBLASInt B_itype = 1;
184         PetscBLASInt B_IL, B_IU;
185         PetscReal    eps = -1.0; /* dlamch? */
186         PetscInt     nmin_s;
187 
188         /* ask for eigenvalues larger than thresh */
189         if (pcbddc->dbg_flag) {
190           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
191         }
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           eigs_start = B_neigs -nmax;
214           B_neigs = nmax;
215         }
216 
217         nmin_s = PetscMin(nmin,B_N);
218         if (B_neigs < nmin_s) {
219           PetscBLASInt B_neigs2;
220 
221           B_IU = B_N - B_neigs;
222           B_IL = B_N - nmin_s + 1;
223           if (pcbddc->dbg_flag) {
224             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
225           }
226           if (sub_schurs->is_hermitian) {
227             PetscInt j;
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231             for (j=0;j<subset_size;j++) {
232               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
233             }
234           } else {
235             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237           }
238           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
239 #if defined(PETSC_USE_COMPLEX)
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
241 #else
242           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
243 #endif
244           ierr = PetscFPTrapPop();CHKERRQ(ierr);
245           B_neigs += B_neigs2;
246         }
247         if (B_ierr) {
248           if (B_ierr < 0 ) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
250           } else if (B_ierr <= B_N) {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
252           } else {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
254           }
255         }
256         if (pcbddc->dbg_flag) {
257           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
258           for (j=0;j<B_neigs;j++) {
259             if (eigs[j] == 0.0) {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
261             } else {
262               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
263             }
264           }
265         }
266       } else {
267           /* TODO */
268       }
269 
270       maxneigs = PetscMax(B_neigs,maxneigs);
271       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+cum2]);
280           }
281         }
282       }
283       for (j=0;j<B_neigs;j++) {
284 #if 0
285         {
286           PetscBLASInt Blas_N,Blas_one = 1.0;
287           PetscScalar norm;
288           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
289           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
290           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
291             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
292           } else {
293             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
294           }
295           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
296         }
297 #endif
298         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
299         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
300         cum2 += subset_size;
301       }
302       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
303     }
304     /* shift for next computation */
305     cumarray += subset_size*subset_size;
306   }
307   if (pcbddc->dbg_flag) {
308     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
309   }
310   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
311 
312   if (mss) {
313     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
314     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
315     /* destroy matrices (junk) */
316     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
317     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
318   }
319   if (allocated_S_St) {
320     ierr = PetscFree2(S,St);CHKERRQ(ierr);
321   }
322   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     Mat         C_B,CPHI;
974     IS          is_dummy;
975     Vec         mones;
976     MatType     checkmattype=MATSEQAIJ;
977     PetscReal   real_value;
978 
979     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
981     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
982     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
983     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
984     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
985     if (unsymmetric_check) {
986       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
987       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
988     }
989     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
990 
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
993     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
994     if (unsymmetric_check) {
995       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1006       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1007     } else {
1008       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1009       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1014       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1015       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1016     }
1017     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1021     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1024     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1025 
1026     /* check constraints */
1027     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1028     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1029     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1030     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1031     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1032     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1033     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1034     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1035     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1036     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1038     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1039 
1040     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1041     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1042     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1043     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1044     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1045     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1046     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1047     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1048     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1049     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1050     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1053       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1054     }
1055     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1056   }
1057 
1058   /* free memory */
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (n_vertices) {
1061     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1062   }
1063   if (n_constraints) {
1064     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1065   }
1066   /* get back data */
1067   *coarse_submat_vals_n = coarse_submat_vals;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1073 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1074 {
1075   Mat            *work_mat;
1076   IS             isrow_s,iscol_s;
1077   PetscBool      rsorted,csorted;
1078   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1083   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1084   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1085   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1086 
1087   if (!rsorted) {
1088     const PetscInt *idxs;
1089     PetscInt *idxs_sorted,i;
1090 
1091     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1092     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1093     for (i=0;i<rsize;i++) {
1094       idxs_perm_r[i] = i;
1095     }
1096     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1097     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1098     for (i=0;i<rsize;i++) {
1099       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1100     }
1101     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1102     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1105     isrow_s = isrow;
1106   }
1107 
1108   if (!csorted) {
1109     if (isrow == iscol) {
1110       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1111       iscol_s = isrow_s;
1112     } else {
1113       const PetscInt *idxs;
1114       PetscInt *idxs_sorted,i;
1115 
1116       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1117       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1118       for (i=0;i<csize;i++) {
1119         idxs_perm_c[i] = i;
1120       }
1121       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1122       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1123       for (i=0;i<csize;i++) {
1124         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1125       }
1126       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1127       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1128     }
1129   } else {
1130     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1131     iscol_s = iscol;
1132   }
1133 
1134   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1135 
1136   if (!rsorted || !csorted) {
1137     Mat      new_mat;
1138     IS       is_perm_r,is_perm_c;
1139 
1140     if (!rsorted) {
1141       PetscInt *idxs_r,i;
1142       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1143       for (i=0;i<rsize;i++) {
1144         idxs_r[idxs_perm_r[i]] = i;
1145       }
1146       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1147       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1148     } else {
1149       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1150     }
1151     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1152 
1153     if (!csorted) {
1154       if (isrow_s == iscol_s) {
1155         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1156         is_perm_c = is_perm_r;
1157       } else {
1158         PetscInt *idxs_c,i;
1159         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1160         for (i=0;i<csize;i++) {
1161           idxs_c[idxs_perm_c[i]] = i;
1162         }
1163         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1164         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1165       }
1166     } else {
1167       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1168     }
1169     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1170 
1171     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1173     work_mat[0] = new_mat;
1174     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1175     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1176   }
1177 
1178   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1179   *B = work_mat[0];
1180   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1181   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1182   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1188 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1189 {
1190   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1192   Mat            new_mat;
1193   IS             is_local,is_global;
1194   PetscInt       local_size;
1195   PetscBool      isseqaij;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1200   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1201   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1202   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1203   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1205   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1206 
1207   /* check */
1208   if (pcbddc->dbg_flag) {
1209     Vec       x,x_change;
1210     PetscReal error;
1211 
1212     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1213     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1214     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1215     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1221     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1222     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1223     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1224     ierr = VecDestroy(&x);CHKERRQ(ierr);
1225     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1226   }
1227 
1228   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1229   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1230   if (isseqaij) {
1231     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232   } else {
1233     Mat work_mat;
1234     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1235     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1237   }
1238   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1239 #if !defined(PETSC_USE_COMPLEX)
1240   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1241 #endif
1242   /*
1243   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1244   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1245   */
1246   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1252 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1253 {
1254   PC_IS*         pcis = (PC_IS*)(pc->data);
1255   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1256   IS             is_aux1,is_aux2;
1257   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1258   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1259   PetscInt       vbs,bs;
1260   PetscBT        bitmask;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   /*
1265     No need to setup local scatters if
1266       - primal space is unchanged
1267         AND
1268       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1269         AND
1270       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1271   */
1272   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1273     PetscFunctionReturn(0);
1274   }
1275   /* destroy old objects */
1276   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1278   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1279   /* Set Non-overlapping dimensions */
1280   n_B = pcis->n_B; n_D = pcis->n - n_B;
1281   n_vertices = pcbddc->n_actual_vertices;
1282   /* create auxiliary bitmask */
1283   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1284   for (i=0;i<n_vertices;i++) {
1285     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1286   }
1287 
1288   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1289   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1290   for (i=0, n_R=0; i<pcis->n; i++) {
1291     if (!PetscBTLookup(bitmask,i)) {
1292       idx_R_local[n_R] = i;
1293       n_R++;
1294     }
1295   }
1296 
1297   /* Block code */
1298   vbs = 1;
1299   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1300   if (bs>1 && !(n_vertices%bs)) {
1301     PetscBool is_blocked = PETSC_TRUE;
1302     PetscInt  *vary;
1303     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1304     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1305     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1306     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1307     for (i=0; i<n_vertices/bs; i++) {
1308       if (vary[i]!=0 && vary[i]!=bs) {
1309         is_blocked = PETSC_FALSE;
1310         break;
1311       }
1312     }
1313     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1314       vbs = bs;
1315       for (i=0;i<n_R/vbs;i++) {
1316         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1317       }
1318     }
1319     ierr = PetscFree(vary);CHKERRQ(ierr);
1320   }
1321   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1322   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1323 
1324   /* print some info if requested */
1325   if (pcbddc->dbg_flag) {
1326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1329     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1330     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1331     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1332     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1333     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1334   }
1335 
1336   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1337   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1339   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1340   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0; i<n_D; i++) {
1342     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1343   }
1344   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0, j=0; i<n_R; i++) {
1346     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1347       aux_array1[j++] = i;
1348     }
1349   }
1350   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1352   for (i=0, j=0; i<n_B; i++) {
1353     if (!PetscBTLookup(bitmask,is_indices[i])) {
1354       aux_array2[j++] = i;
1355     }
1356   }
1357   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1358   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1359   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1360   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1361   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1362 
1363   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1364     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1365     for (i=0, j=0; i<n_R; i++) {
1366       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1367         aux_array1[j++] = i;
1368       }
1369     }
1370     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1371     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1372     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1373   }
1374   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1375   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1382 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1383 {
1384   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1385   PC_IS          *pcis = (PC_IS*)pc->data;
1386   PC             pc_temp;
1387   Mat            A_RR;
1388   MatReuse       reuse;
1389   PetscScalar    m_one = -1.0;
1390   PetscReal      value;
1391   PetscInt       n_D,n_R,ibs,mbs;
1392   PetscBool      use_exact,use_exact_reduced,issbaij;
1393   PetscErrorCode ierr;
1394   /* prefixes stuff */
1395   char           dir_prefix[256],neu_prefix[256],str_level[16];
1396   size_t         len;
1397 
1398   PetscFunctionBegin;
1399 
1400   /* compute prefixes */
1401   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1402   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1403   if (!pcbddc->current_level) {
1404     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1405     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1406     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1407     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1410     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1411     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1412     len -= 15; /* remove "pc_bddc_coarse_" */
1413     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1414     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1415     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1416     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1417     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1418     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1419     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1420     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1421   }
1422 
1423   /* DIRICHLET PROBLEM */
1424   if (dirichlet) {
1425     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1426     if (pcbddc->issym) {
1427       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1428     }
1429     /* Matrix for Dirichlet problem is pcis->A_II */
1430     n_D = pcis->n - pcis->n_B;
1431     if (!pcbddc->ksp_D) { /* create object if not yet build */
1432       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1433       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1434       /* default */
1435       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1436       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1437       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1439       if (issbaij) {
1440         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1441       } else {
1442         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1443       }
1444       /* Allow user's customization */
1445       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1446       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1447     }
1448     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1449     if (sub_schurs->interior_solver) {
1450       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1451     }
1452     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1453     if (!n_D) {
1454       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1455       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1456     }
1457     /* Set Up KSP for Dirichlet problem of BDDC */
1458     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1459     /* set ksp_D into pcis data */
1460     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1461     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1462     pcis->ksp_D = pcbddc->ksp_D;
1463   }
1464 
1465   /* NEUMANN PROBLEM */
1466   A_RR = 0;
1467   if (neumann) {
1468     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1469     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1470     if (pcbddc->ksp_R) { /* already created ksp */
1471       PetscInt nn_R;
1472       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1473       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1474       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1475       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1476         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1477         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478         reuse = MAT_INITIAL_MATRIX;
1479       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1480         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1481           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482           reuse = MAT_INITIAL_MATRIX;
1483         } else { /* safe to reuse the matrix */
1484           reuse = MAT_REUSE_MATRIX;
1485         }
1486       }
1487       /* last check */
1488       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1489         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1490         reuse = MAT_INITIAL_MATRIX;
1491       }
1492     } else { /* first time, so we need to create the matrix */
1493       reuse = MAT_INITIAL_MATRIX;
1494     }
1495     /* extract A_RR */
1496     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1497     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1498     if (ibs != mbs) {
1499       Mat newmat;
1500       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1501       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1502       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1505     }
1506     if (pcbddc->issym) {
1507       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1508     }
1509     if (!pcbddc->ksp_R) { /* create object if not present */
1510       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1511       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1512       /* default */
1513       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1514       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1516       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1517       if (issbaij) {
1518         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1519       } else {
1520         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1521       }
1522       /* Allow user's customization */
1523       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1524       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1527     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1528     if (!n_R) {
1529       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1530       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1531     }
1532     /* Set Up KSP for Neumann problem of BDDC */
1533     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1534   }
1535 
1536   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1537   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1538     if (pcbddc->dbg_flag) {
1539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1542     }
1543     if (dirichlet) { /* Dirichlet */
1544       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1545       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1546       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1547       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1548       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1549       /* need to be adapted? */
1550       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1551       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1552       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1553       /* print info */
1554       if (pcbddc->dbg_flag) {
1555         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1556         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1557       }
1558       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1559         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1560       }
1561     }
1562     if (neumann) { /* Neumann */
1563       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1564       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1565       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1566       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1567       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1568       /* need to be adapted? */
1569       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1570       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1571       /* print info */
1572       if (pcbddc->dbg_flag) {
1573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1574         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1575       }
1576       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1577         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1578       }
1579     }
1580   }
1581   /* free Neumann problem's matrix */
1582   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1588 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1589 {
1590   PetscErrorCode ierr;
1591   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1592 
1593   PetscFunctionBegin;
1594   if (applytranspose) {
1595     if (pcbddc->local_auxmat1) {
1596       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1597       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1598     }
1599     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1600   } else {
1601     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1602     if (pcbddc->local_auxmat1) {
1603       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1604       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1605     }
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1611 #undef __FUNCT__
1612 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1613 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1614 {
1615   PetscErrorCode ierr;
1616   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1617   PC_IS*            pcis = (PC_IS*)  (pc->data);
1618   const PetscScalar zero = 0.0;
1619 
1620   PetscFunctionBegin;
1621   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1622   if (applytranspose) {
1623     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1624     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1625   } else {
1626     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1628   }
1629   /* start communications from local primal nodes to rhs of coarse solver */
1630   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1631   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1635   /* TODO remove null space when doing multilevel */
1636   if (pcbddc->coarse_ksp) {
1637     Vec rhs,sol;
1638 
1639     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1640     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1641     if (applytranspose) {
1642       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1643     } else {
1644       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1645     }
1646   }
1647 
1648   /* Local solution on R nodes */
1649   if (pcis->n) {
1650     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     }
1657     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1658     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (pcbddc->switch_static) {
1662       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     }
1665   }
1666 
1667   /* communications from coarse sol to local primal nodes */
1668   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1669   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1670 
1671   /* Sum contributions from two levels */
1672   if (applytranspose) {
1673     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1674     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1675   } else {
1676     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1677     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1684 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1685 {
1686   PetscErrorCode ierr;
1687   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1688   PetscScalar    *array;
1689   Vec            from,to;
1690 
1691   PetscFunctionBegin;
1692   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1693     from = pcbddc->coarse_vec;
1694     to = pcbddc->vec1_P;
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697 
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1700       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1702       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1703       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1704     }
1705   } else { /* from local to global -> put data in coarse right hand side */
1706     from = pcbddc->vec1_P;
1707     to = pcbddc->coarse_vec;
1708   }
1709   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1715 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1716 {
1717   PetscErrorCode ierr;
1718   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1719   PetscScalar    *array;
1720   Vec            from,to;
1721 
1722   PetscFunctionBegin;
1723   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1724     from = pcbddc->coarse_vec;
1725     to = pcbddc->vec1_P;
1726   } else { /* from local to global -> put data in coarse right hand side */
1727     from = pcbddc->vec1_P;
1728     to = pcbddc->coarse_vec;
1729   }
1730   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1731   if (smode == SCATTER_FORWARD) {
1732     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1733       Vec tvec;
1734 
1735       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1736       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1737       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1738       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1739     }
1740   } else {
1741     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1742      ierr = VecResetArray(from);CHKERRQ(ierr);
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* uncomment for testing purposes */
1749 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1750 #undef __FUNCT__
1751 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1752 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1753 {
1754   PetscErrorCode    ierr;
1755   PC_IS*            pcis = (PC_IS*)(pc->data);
1756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1758   /* one and zero */
1759   PetscScalar       one=1.0,zero=0.0;
1760   /* space to store constraints and their local indices */
1761   PetscScalar       *temp_quadrature_constraint;
1762   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1763   /* iterators */
1764   PetscInt          i,j,k,total_counts,temp_start_ptr;
1765   /* BLAS integers */
1766   PetscBLASInt      lwork,lierr;
1767   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1768   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1769   /* reuse */
1770   PetscInt          olocal_primal_size;
1771   PetscInt          *oprimal_indices_local_idxs;
1772   /* change of basis */
1773   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1774   PetscBool         boolforchange,qr_needed;
1775   PetscBT           touched,change_basis,qr_needed_idx;
1776   /* auxiliary stuff */
1777   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1778   PetscInt          ncc;
1779   /* some quantities */
1780   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1781   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1782 
1783   PetscFunctionBegin;
1784   /* Destroy Mat objects computed previously */
1785   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1786   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1787   /* save info on constraints from previous setup (if any) */
1788   olocal_primal_size = pcbddc->local_primal_size;
1789   ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
1790   ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
1791   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1792 
1793   /* print some info */
1794   if (pcbddc->dbg_flag) {
1795     IS       vertices;
1796     PetscInt nv,nedges,nfaces;
1797     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1798     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1799     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1800     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1801     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1804     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1805     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1806   }
1807 
1808   if (!pcbddc->adaptive_selection) {
1809     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1810     MatNullSpace nearnullsp;
1811     const Vec    *nearnullvecs;
1812     Vec          *localnearnullsp;
1813     PetscScalar  *array;
1814     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1815     PetscBool    nnsp_has_cnst;
1816     /* LAPACK working arrays for SVD or POD */
1817     PetscBool    skip_lapack;
1818     PetscScalar  *work;
1819     PetscReal    *singular_vals;
1820 #if defined(PETSC_USE_COMPLEX)
1821     PetscReal    *rwork;
1822 #endif
1823 #if defined(PETSC_MISSING_LAPACK_GESVD)
1824     PetscScalar  *temp_basis,*correlation_mat;
1825 #else
1826     PetscBLASInt dummy_int=1;
1827     PetscScalar  dummy_scalar=1.;
1828 #endif
1829 
1830     /* Get index sets for faces, edges and vertices from graph */
1831     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1832     /* free unneeded index sets */
1833     if (!pcbddc->use_vertices) {
1834       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1835     }
1836     if (!pcbddc->use_edges) {
1837       for (i=0;i<n_ISForEdges;i++) {
1838         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1839       }
1840       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1841       n_ISForEdges = 0;
1842     }
1843     if (!pcbddc->use_faces) {
1844       for (i=0;i<n_ISForFaces;i++) {
1845         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1846       }
1847       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1848       n_ISForFaces = 0;
1849     }
1850     /* HACKS (the following two blocks of code) */
1851     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1852       pcbddc->use_change_of_basis = PETSC_TRUE;
1853       if (!ISForEdges) {
1854         pcbddc->use_change_on_faces = PETSC_TRUE;
1855       }
1856     }
1857     if (pcbddc->NullSpace) {
1858       /* use_change_of_basis should be consistent among processors */
1859       PetscBool tbool[2],gbool[2];
1860       tbool[0] = pcbddc->use_change_of_basis;
1861       tbool[1] = pcbddc->use_change_on_faces;
1862       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1863       pcbddc->use_change_of_basis = gbool[0];
1864       pcbddc->use_change_on_faces = gbool[1];
1865     }
1866 
1867     /* check if near null space is attached to global mat */
1868     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1869     if (nearnullsp) {
1870       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1871       /* remove any stored info */
1872       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1873       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1874       /* store information for BDDC solver reuse */
1875       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1876       pcbddc->onearnullspace = nearnullsp;
1877       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1878       for (i=0;i<nnsp_size;i++) {
1879         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1880       }
1881     } else { /* if near null space is not provided BDDC uses constants by default */
1882       nnsp_size = 0;
1883       nnsp_has_cnst = PETSC_TRUE;
1884     }
1885     /* get max number of constraints on a single cc */
1886     max_constraints = nnsp_size;
1887     if (nnsp_has_cnst) max_constraints++;
1888 
1889     /*
1890          Evaluate maximum storage size needed by the procedure
1891          - temp_indices will contain start index of each constraint stored as follows
1892          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1893          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1894          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1895                                                                                                                                                            */
1896     total_counts = n_ISForFaces+n_ISForEdges;
1897     total_counts *= max_constraints;
1898     n_vertices = 0;
1899     if (ISForVertices) {
1900       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1901     }
1902     total_counts += n_vertices;
1903     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1904     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1905     total_counts = 0;
1906     max_size_of_constraint = 0;
1907     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1908       if (i<n_ISForEdges) {
1909         used_IS = &ISForEdges[i];
1910       } else {
1911         used_IS = &ISForFaces[i-n_ISForEdges];
1912       }
1913       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1914       total_counts += j;
1915       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1916     }
1917     total_counts *= max_constraints;
1918     total_counts += n_vertices;
1919     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1920     /* get local part of global near null space vectors */
1921     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1922     for (k=0;k<nnsp_size;k++) {
1923       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1924       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1925       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1926     }
1927 
1928     /* whether or not to skip lapack calls */
1929     skip_lapack = PETSC_TRUE;
1930     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1931 
1932     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1933     if (!skip_lapack) {
1934       PetscScalar temp_work;
1935 
1936 #if defined(PETSC_MISSING_LAPACK_GESVD)
1937       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1938       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1939       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1940       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1941 #if defined(PETSC_USE_COMPLEX)
1942       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1943 #endif
1944       /* now we evaluate the optimal workspace using query with lwork=-1 */
1945       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1946       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1947       lwork = -1;
1948       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1949 #if !defined(PETSC_USE_COMPLEX)
1950       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1951 #else
1952       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1953 #endif
1954       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1955       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1956 #else /* on missing GESVD */
1957       /* SVD */
1958       PetscInt max_n,min_n;
1959       max_n = max_size_of_constraint;
1960       min_n = max_constraints;
1961       if (max_size_of_constraint < max_constraints) {
1962         min_n = max_size_of_constraint;
1963         max_n = max_constraints;
1964       }
1965       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1966 #if defined(PETSC_USE_COMPLEX)
1967       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1968 #endif
1969       /* now we evaluate the optimal workspace using query with lwork=-1 */
1970       lwork = -1;
1971       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1972       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1973       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1974       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1975 #if !defined(PETSC_USE_COMPLEX)
1976       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1977 #else
1978       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1979 #endif
1980       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1981       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1982 #endif /* on missing GESVD */
1983       /* Allocate optimal workspace */
1984       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1985       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1986     }
1987     /* Now we can loop on constraining sets */
1988     total_counts = 0;
1989     temp_indices[0] = 0;
1990     /* vertices */
1991     if (ISForVertices) {
1992       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1993       if (nnsp_has_cnst) { /* consider all vertices */
1994         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1995         for (i=0;i<n_vertices;i++) {
1996           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1997           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1998           total_counts++;
1999         }
2000       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2001         PetscBool used_vertex;
2002         for (i=0;i<n_vertices;i++) {
2003           used_vertex = PETSC_FALSE;
2004           k = 0;
2005           while (!used_vertex && k<nnsp_size) {
2006             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2007             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2008               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2009               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2010               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2011               total_counts++;
2012               used_vertex = PETSC_TRUE;
2013             }
2014             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2015             k++;
2016           }
2017         }
2018       }
2019       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2020       n_vertices = total_counts;
2021     }
2022 
2023     /* edges and faces */
2024     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2025       if (ncc<n_ISForEdges) {
2026         used_IS = &ISForEdges[ncc];
2027         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2028       } else {
2029         used_IS = &ISForFaces[ncc-n_ISForEdges];
2030         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2031       }
2032       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2033       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2034       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2035       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2036       /* change of basis should not be performed on local periodic nodes */
2037       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2038       if (nnsp_has_cnst) {
2039         PetscScalar quad_value;
2040         temp_constraints++;
2041         if (!pcbddc->use_nnsp_true) {
2042           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2043         } else {
2044           quad_value = 1.0;
2045         }
2046         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2047         for (j=0;j<size_of_constraint;j++) {
2048           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2049         }
2050         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2051         total_counts++;
2052       }
2053       for (k=0;k<nnsp_size;k++) {
2054         PetscReal real_value;
2055         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2056         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2057         for (j=0;j<size_of_constraint;j++) {
2058           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2059         }
2060         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2061         /* check if array is null on the connected component */
2062         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2063         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2064         if (real_value > 0.0) { /* keep indices and values */
2065           temp_constraints++;
2066           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2067           total_counts++;
2068         }
2069       }
2070       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2071       valid_constraints = temp_constraints;
2072       if (!pcbddc->use_nnsp_true && temp_constraints) {
2073         if (temp_constraints == 1) { /* just normalize the constraint */
2074           PetscScalar norm;
2075           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2076           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2077           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2078           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2079         } else { /* perform SVD */
2080           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2081 
2082 #if defined(PETSC_MISSING_LAPACK_GESVD)
2083           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2084              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2085              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2086                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2087                 from that computed using LAPACKgesvd
2088              -> This is due to a different computation of eigenvectors in LAPACKheev
2089              -> The quality of the POD-computed basis will be the same */
2090           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2091           /* Store upper triangular part of correlation matrix */
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2093           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2094           for (j=0;j<temp_constraints;j++) {
2095             for (k=0;k<j+1;k++) {
2096               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2097             }
2098           }
2099           /* compute eigenvalues and eigenvectors of correlation matrix */
2100           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2101           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2102 #if !defined(PETSC_USE_COMPLEX)
2103           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2104 #else
2105           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2106 #endif
2107           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2108           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2109           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2110           j = 0;
2111           while (j < temp_constraints && singular_vals[j] < tol) j++;
2112           total_counts = total_counts-j;
2113           valid_constraints = temp_constraints-j;
2114           /* scale and copy POD basis into used quadrature memory */
2115           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2116           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2117           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2118           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2119           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2120           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2121           if (j<temp_constraints) {
2122             PetscInt ii;
2123             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2124             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2125             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2126             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2127             for (k=0;k<temp_constraints-j;k++) {
2128               for (ii=0;ii<size_of_constraint;ii++) {
2129                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2130               }
2131             }
2132           }
2133 #else  /* on missing GESVD */
2134           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2136           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2137           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2138 #if !defined(PETSC_USE_COMPLEX)
2139           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2140 #else
2141           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2142 #endif
2143           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2144           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2145           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2146           k = temp_constraints;
2147           if (k > size_of_constraint) k = size_of_constraint;
2148           j = 0;
2149           while (j < k && singular_vals[k-j-1] < tol) j++;
2150           valid_constraints = k-j;
2151           total_counts = total_counts-temp_constraints+valid_constraints;
2152 #endif /* on missing GESVD */
2153         }
2154       }
2155       /* setting change_of_basis flag is safe now */
2156       if (boolforchange) {
2157         for (j=0;j<valid_constraints;j++) {
2158           PetscBTSet(change_basis,total_counts-j-1);
2159         }
2160       }
2161     }
2162     /* free workspace */
2163     if (!skip_lapack) {
2164       ierr = PetscFree(work);CHKERRQ(ierr);
2165 #if defined(PETSC_USE_COMPLEX)
2166       ierr = PetscFree(rwork);CHKERRQ(ierr);
2167 #endif
2168       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2169 #if defined(PETSC_MISSING_LAPACK_GESVD)
2170       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2171       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2172 #endif
2173     }
2174     for (k=0;k<nnsp_size;k++) {
2175       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2176     }
2177     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2178     /* free index sets of faces, edges and vertices */
2179     for (i=0;i<n_ISForFaces;i++) {
2180       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2181     }
2182     if (n_ISForFaces) {
2183       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2184     }
2185     for (i=0;i<n_ISForEdges;i++) {
2186       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2187     }
2188     if (n_ISForEdges) {
2189       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2190     }
2191     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2192   } else {
2193     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2194     PetscInt        cum = 0;
2195 
2196     total_counts = 0;
2197     n_vertices = 0;
2198     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2199       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2200     }
2201     max_constraints = 0;
2202     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2203       total_counts += pcbddc->adaptive_constraints_n[i];
2204       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2205     }
2206     temp_indices = pcbddc->adaptive_constraints_ptrs;
2207     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2208     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2209 
2210 #if 0
2211     printf("Found %d totals\n",total_counts);
2212     for (i=0;i<total_counts;i++) {
2213       printf("const %d, start %d",i,temp_indices[i]);
2214       printf(" end %d:\n",temp_indices[i+1]);
2215       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2216         printf("  idxs %d",temp_indices_to_constraint[j]);
2217         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2218       }
2219     }
2220     for (i=0;i<n_vertices;i++) {
2221       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2222     }
2223     for (i=0;i<sub_schurs->n_subs;i++) {
2224       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2225     }
2226 #endif
2227 
2228     max_size_of_constraint = 0;
2229     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2230     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2231     /* Change of basis */
2232     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2233     if (pcbddc->use_change_of_basis) {
2234       cum = n_vertices;
2235       for (i=0;i<sub_schurs->n_subs;i++) {
2236         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2237           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2238             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2239           }
2240         }
2241         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2242       }
2243     }
2244   }
2245 
2246   /* map temp_indices_to_constraint in boundary numbering */
2247   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2248   if (i != temp_indices[total_counts]) {
2249     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2250   }
2251 
2252   /* set quantities in pcbddc data structure and store previous primal size */
2253   /* n_vertices defines the number of subdomain corners in the primal space */
2254   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2255   pcbddc->local_primal_size = total_counts;
2256   pcbddc->n_vertices = n_vertices;
2257   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2258 
2259   /* Create constraint matrix */
2260   /* The constraint matrix is used to compute the l2g map of primal dofs */
2261   /* so we need to set it up properly either with or without change of basis */
2262   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2263   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2264   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2265   /* array to compute a local numbering of constraints : vertices first then constraints */
2266   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2267   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2268   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2269   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2270   /* auxiliary stuff for basis change */
2271   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2272   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2273 
2274   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2275   total_primal_vertices=0;
2276   for (i=0;i<pcbddc->local_primal_size;i++) {
2277     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2278     if (size_of_constraint == 1) {
2279       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2280       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2281       aux_primal_minloc[total_primal_vertices]=0;
2282       total_primal_vertices++;
2283     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2284       PetscInt min_loc,min_index;
2285       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2286       /* find first untouched local node */
2287       k = 0;
2288       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2289       min_index = global_indices[k];
2290       min_loc = k;
2291       /* search the minimum among global nodes already untouched on the cc */
2292       for (k=1;k<size_of_constraint;k++) {
2293         /* there can be more than one constraint on a single connected component */
2294         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2295           min_index = global_indices[k];
2296           min_loc = k;
2297         }
2298       }
2299       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2300       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2301       aux_primal_minloc[total_primal_vertices]=min_loc;
2302       total_primal_vertices++;
2303     }
2304   }
2305   /* determine if a QR strategy is needed for change of basis */
2306   qr_needed = PETSC_FALSE;
2307   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2308   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2309     if (PetscBTLookup(change_basis,i)) {
2310       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2311         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2312         j = 0;
2313         for (k=0;k<size_of_constraint;k++) {
2314           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2315             j++;
2316           }
2317         }
2318         /* found more than one primal dof on the cc */
2319         if (j > 1) {
2320           PetscBTSet(qr_needed_idx,i);
2321           qr_needed = PETSC_TRUE;
2322         }
2323       } else {
2324         PetscBTSet(qr_needed_idx,i);
2325         qr_needed = PETSC_TRUE;
2326       }
2327     }
2328   }
2329   total_counts = total_primal_vertices;
2330   for (i=0;i<pcbddc->local_primal_size;i++) {
2331     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2332     if (size_of_constraint != 1 && !PetscBTLookup(change_basis,i)) {
2333       PetscInt min_loc,min_index;
2334       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2335       /* find first untouched local node */
2336       k = 0;
2337       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2338       min_index = global_indices[k];
2339       min_loc = k;
2340       /* search the minimum among global nodes already untouched on the cc */
2341       for (k=1;k<size_of_constraint;k++) {
2342         /* there can be more than one constraint on a single connected component */
2343         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2344           min_index = global_indices[k];
2345           min_loc = k;
2346         }
2347       }
2348       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2349       aux_primal_numbering[total_counts]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2350       aux_primal_minloc[total_counts]=min_loc;
2351       total_counts++;
2352     }
2353   }
2354   /* free workspace */
2355   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2356 
2357   /* permute indices in order to have a sorted set of vertices */
2358   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2359 
2360   /* get reference dofs for local primal dofs */
2361   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2362   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,pcbddc->local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2363   pcbddc->n_actual_vertices = total_primal_vertices;
2364 
2365   /* nonzero structure of constraint matrix */
2366   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2367   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2368   j=total_primal_vertices;
2369   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2370     if (!PetscBTLookup(change_basis,i)) {
2371       nnz[j]=temp_indices[i+1]-temp_indices[i];
2372       j++;
2373     }
2374   }
2375   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2376   ierr = PetscFree(nnz);CHKERRQ(ierr);
2377 
2378   /* set values in constraint matrix */
2379   for (i=0;i<total_primal_vertices;i++) {
2380     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2381   }
2382   total_counts = total_primal_vertices;
2383   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2384     if (!PetscBTLookup(change_basis,i)) {
2385       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2386       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2387       total_counts++;
2388     }
2389   }
2390   /* assembling */
2391   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393 
2394   /*
2395   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2396   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2397   */
2398   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2399   if (pcbddc->use_change_of_basis) {
2400     /* dual and primal dofs on a single cc */
2401     PetscInt     dual_dofs,primal_dofs;
2402     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2403     PetscInt     primal_counter;
2404     /* working stuff for GEQRF */
2405     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2406     PetscBLASInt lqr_work;
2407     /* working stuff for UNGQR */
2408     PetscScalar  *gqr_work,lgqr_work_t;
2409     PetscBLASInt lgqr_work;
2410     /* working stuff for TRTRS */
2411     PetscScalar  *trs_rhs;
2412     PetscBLASInt Blas_NRHS;
2413     /* pointers for values insertion into change of basis matrix */
2414     PetscInt     *start_rows,*start_cols;
2415     PetscScalar  *start_vals;
2416     /* working stuff for values insertion */
2417     PetscBT      is_primal;
2418     /* matrix sizes */
2419     PetscInt     global_size,local_size;
2420     /* temporary change of basis */
2421     Mat          localChangeOfBasisMatrix;
2422     /* extra space for debugging */
2423     PetscScalar  *dbg_work;
2424 
2425     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2426     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2427     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2428     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2429     /* nonzeros for local mat */
2430     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2431     for (i=0;i<pcis->n;i++) nnz[i]=1;
2432     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2433       if (PetscBTLookup(change_basis,i)) {
2434         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2435         if (PetscBTLookup(qr_needed_idx,i)) {
2436           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2437         } else {
2438           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2439           /* get local primal index on the cc */
2440           j = 0;
2441           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2442           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2443         }
2444       }
2445     }
2446     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2447     ierr = PetscFree(nnz);CHKERRQ(ierr);
2448     /* Set initial identity in the matrix */
2449     for (i=0;i<pcis->n;i++) {
2450       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2451     }
2452 
2453     if (pcbddc->dbg_flag) {
2454       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2455       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2456     }
2457 
2458 
2459     /* Now we loop on the constraints which need a change of basis */
2460     /*
2461        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2462        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2463 
2464        Basic blocks of change of basis matrix T computed by
2465 
2466           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2467 
2468             | 1        0   ...        0         s_1/S |
2469             | 0        1   ...        0         s_2/S |
2470             |              ...                        |
2471             | 0        ...            1     s_{n-1}/S |
2472             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2473 
2474             with S = \sum_{i=1}^n s_i^2
2475             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2476                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2477 
2478           - QR decomposition of constraints otherwise
2479     */
2480     if (qr_needed) {
2481       /* space to store Q */
2482       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2483       /* first we issue queries for optimal work */
2484       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2485       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2486       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2487       lqr_work = -1;
2488       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2489       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2490       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2491       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2492       lgqr_work = -1;
2493       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2494       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2495       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2496       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2497       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2498       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2499       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2500       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2501       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2502       /* array to store scaling factors for reflectors */
2503       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2504       /* array to store rhs and solution of triangular solver */
2505       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2506       /* allocating workspace for check */
2507       if (pcbddc->dbg_flag) {
2508         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2509       }
2510     }
2511     /* array to store whether a node is primal or not */
2512     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2513     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2514     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2515     if (i != total_primal_vertices) {
2516       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2517     }
2518     for (i=0;i<total_primal_vertices;i++) {
2519       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2520     }
2521     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2522 
2523     /* loop on constraints and see whether or not they need a change of basis and compute it */
2524     /* -> using implicit ordering contained in temp_indices data */
2525     total_counts = pcbddc->n_vertices;
2526     primal_counter = total_counts;
2527     while (total_counts<pcbddc->local_primal_size) {
2528       primal_dofs = 1;
2529       if (PetscBTLookup(change_basis,total_counts)) {
2530         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2531         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2532           primal_dofs++;
2533         }
2534         /* get constraint info */
2535         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2536         dual_dofs = size_of_constraint-primal_dofs;
2537 
2538         if (pcbddc->dbg_flag) {
2539           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2540         }
2541 
2542         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2543 
2544           /* copy quadrature constraints for change of basis check */
2545           if (pcbddc->dbg_flag) {
2546             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2547           }
2548           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2549           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2550 
2551           /* compute QR decomposition of constraints */
2552           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2553           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2554           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2555           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2556           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2557           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2558           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2559 
2560           /* explictly compute R^-T */
2561           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2562           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2563           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2564           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2565           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2566           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2567           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2568           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2569           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2570           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2571 
2572           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2573           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2574           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2575           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2576           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2577           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2578           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2579           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2580           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2581 
2582           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2583              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2584              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2585           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2586           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2587           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2588           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2589           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2590           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2591           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2592           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2593           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2594           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2595 
2596           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2597           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2598           /* insert cols for primal dofs */
2599           for (j=0;j<primal_dofs;j++) {
2600             start_vals = &qr_basis[j*size_of_constraint];
2601             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2602             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2603           }
2604           /* insert cols for dual dofs */
2605           for (j=0,k=0;j<dual_dofs;k++) {
2606             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2607               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2608               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2609               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2610               j++;
2611             }
2612           }
2613 
2614           /* check change of basis */
2615           if (pcbddc->dbg_flag) {
2616             PetscInt   ii,jj;
2617             PetscBool valid_qr=PETSC_TRUE;
2618             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2619             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2620             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2621             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2622             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2623             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2624             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2625             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2626             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2627             for (jj=0;jj<size_of_constraint;jj++) {
2628               for (ii=0;ii<primal_dofs;ii++) {
2629                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2630                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2631               }
2632             }
2633             if (!valid_qr) {
2634               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2635               for (jj=0;jj<size_of_constraint;jj++) {
2636                 for (ii=0;ii<primal_dofs;ii++) {
2637                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2638                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2639                   }
2640                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2641                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2642                   }
2643                 }
2644               }
2645             } else {
2646               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2647             }
2648           }
2649         } else { /* simple transformation block */
2650           PetscInt    row,col;
2651           PetscScalar val,norm;
2652 
2653           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2654           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2655           for (j=0;j<size_of_constraint;j++) {
2656             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2657             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2658             if (!PetscBTLookup(is_primal,row_B)) {
2659               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2660               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2661               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2662             } else {
2663               for (k=0;k<size_of_constraint;k++) {
2664                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2665                 if (row != col) {
2666                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2667                 } else {
2668                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2669                 }
2670                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2671               }
2672             }
2673           }
2674           if (pcbddc->dbg_flag) {
2675             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2676           }
2677         }
2678         /* increment primal counter */
2679         primal_counter += primal_dofs;
2680       } else {
2681         if (pcbddc->dbg_flag) {
2682           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2683         }
2684       }
2685       /* increment constraint counter total_counts */
2686       total_counts += primal_dofs;
2687     }
2688 
2689     /* free workspace */
2690     if (qr_needed) {
2691       if (pcbddc->dbg_flag) {
2692         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2693       }
2694       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2695       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2696       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2697       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2698       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2699     }
2700     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2701     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2702     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2703 
2704     /* assembling of global change of variable */
2705     {
2706       Mat      tmat;
2707       PetscInt bs;
2708 
2709       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2710       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2711       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2712       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2713       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2714       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2715       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2716       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2717       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2718       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2719       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2720       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2721       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2722       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2723       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2724       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2725       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2726       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2727     }
2728     /* check */
2729     if (pcbddc->dbg_flag) {
2730       PetscReal error;
2731       Vec       x,x_change;
2732 
2733       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2734       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2735       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2736       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2737       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2738       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2739       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2740       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2741       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2742       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2743       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2744       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2745       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2746       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2747       ierr = VecDestroy(&x);CHKERRQ(ierr);
2748       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2749     }
2750 
2751     /* adapt sub_schurs computed (if any) */
2752     if (pcbddc->use_deluxe_scaling) {
2753       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2754       if (sub_schurs->S_Ej_all) {
2755         Mat S_new,tmat;
2756         IS is_all_N;
2757 
2758         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2759         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2760         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2761         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2762         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2763         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2764         sub_schurs->S_Ej_all = S_new;
2765         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2766         if (sub_schurs->sum_S_Ej_all) {
2767           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2768           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2769           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2770           sub_schurs->sum_S_Ej_all = S_new;
2771           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2772         }
2773         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2774       }
2775     }
2776     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2777   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2778     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2779     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2780   }
2781 
2782   /* set up change of basis context */
2783   if (pcbddc->ChangeOfBasisMatrix) {
2784     PCBDDCChange_ctx change_ctx;
2785 
2786     if (!pcbddc->new_global_mat) {
2787       PetscInt global_size,local_size;
2788 
2789       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2790       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2791       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2792       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2793       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2794       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2795       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2796       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2797       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2798     } else {
2799       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2800       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2801       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2802     }
2803     if (!pcbddc->user_ChangeOfBasisMatrix) {
2804       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2805       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2806     } else {
2807       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2808       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2809     }
2810     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2811     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2812     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2813     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2814   }
2815 
2816   /* get indices in local ordering for vertices and constraints */
2817   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2818 
2819   /* check if a new primal space has been introduced */
2820   pcbddc->new_primal_space_local = PETSC_TRUE;
2821   if (olocal_primal_size == pcbddc->local_primal_size) {
2822     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2823     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2824   }
2825   ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2826   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2827   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2828 
2829   /* flush dbg viewer */
2830   if (pcbddc->dbg_flag) {
2831     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2832   }
2833 
2834   /* free workspace */
2835   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2836   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2837   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2838   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2839   if (!pcbddc->adaptive_selection) {
2840     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2841     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2842   } else {
2843     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2844                       pcbddc->adaptive_constraints_ptrs,
2845                       pcbddc->adaptive_constraints_idxs,
2846                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2847     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2848   }
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2854 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2855 {
2856   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2857   PC_IS       *pcis = (PC_IS*)pc->data;
2858   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2859   PetscInt    ierr,i,vertex_size,N;
2860   PetscViewer viewer=pcbddc->dbg_viewer;
2861 
2862   PetscFunctionBegin;
2863   /* Reset previously computed graph */
2864   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2865   /* Init local Graph struct */
2866   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2867   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2868 
2869   /* Check validity of the csr graph passed in by the user */
2870   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2871     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2872   }
2873 
2874   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2875   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2876     PetscInt  *xadj,*adjncy;
2877     PetscInt  nvtxs;
2878     PetscBool flg_row=PETSC_FALSE;
2879 
2880     if (pcbddc->use_local_adj) {
2881 
2882       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2883       if (flg_row) {
2884         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2885         pcbddc->computed_rowadj = PETSC_TRUE;
2886       }
2887       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2888     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2889       IS                     is_dummy;
2890       ISLocalToGlobalMapping l2gmap_dummy;
2891       PetscInt               j,sum;
2892       PetscInt               *cxadj,*cadjncy;
2893       const PetscInt         *idxs;
2894       PCBDDCGraph            graph;
2895       PetscBT                is_on_boundary;
2896 
2897       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2898       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2899       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2900       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2901       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2902       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2903       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2904       if (flg_row) {
2905         graph->xadj = xadj;
2906         graph->adjncy = adjncy;
2907       }
2908       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2909       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2910       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2911 
2912       if (pcbddc->dbg_flag) {
2913         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2914         for (i=0;i<graph->ncc;i++) {
2915           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2916         }
2917       }
2918 
2919       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2920       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2921       for (i=0;i<pcis->n_B;i++) {
2922         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2923       }
2924       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2925 
2926       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2927       sum = 0;
2928       for (i=0;i<graph->ncc;i++) {
2929         PetscInt sizecc = 0;
2930         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2931           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2932             sizecc++;
2933           }
2934         }
2935         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2936           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2937             cxadj[graph->queue[j]] = sizecc;
2938           }
2939         }
2940         sum += sizecc*sizecc;
2941       }
2942       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2943       sum = 0;
2944       for (i=0;i<pcis->n;i++) {
2945         PetscInt temp = cxadj[i];
2946         cxadj[i] = sum;
2947         sum += temp;
2948       }
2949       cxadj[pcis->n] = sum;
2950       for (i=0;i<graph->ncc;i++) {
2951         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2952           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2953             PetscInt k,sizecc = 0;
2954             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2955               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2956                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2957                 sizecc++;
2958               }
2959             }
2960           }
2961         }
2962       }
2963       if (sum) {
2964         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2965       } else {
2966         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2967         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2968       }
2969       graph->xadj = 0;
2970       graph->adjncy = 0;
2971       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2972       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2973     }
2974   }
2975   if (pcbddc->dbg_flag) {
2976     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2977   }
2978 
2979   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2980   vertex_size = 1;
2981   if (pcbddc->user_provided_isfordofs) {
2982     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2983       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2984       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2985         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2986         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2987       }
2988       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2989       pcbddc->n_ISForDofs = 0;
2990       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2991     }
2992     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2993     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2994   } else {
2995     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2996       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2997       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2998       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2999         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3000       }
3001     }
3002   }
3003 
3004   /* Setup of Graph */
3005   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3006     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3007   }
3008   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3009     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3010   }
3011   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3012 
3013   /* Graph's connected components analysis */
3014   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3015 
3016   /* print some info to stdout */
3017   if (pcbddc->dbg_flag) {
3018     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3019   }
3020 
3021   /* mark topography has done */
3022   pcbddc->recompute_topography = PETSC_FALSE;
3023   PetscFunctionReturn(0);
3024 }
3025 
3026 #undef __FUNCT__
3027 #define __FUNCT__ "PCBDDCSubsetNumbering"
3028 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3029 {
3030   Vec            local_vec,global_vec;
3031   IS             seqis,paris;
3032   VecScatter     scatter_ctx;
3033   PetscScalar    *array;
3034   PetscInt       *temp_global_dofs;
3035   PetscScalar    globalsum;
3036   PetscInt       i,j,s;
3037   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3038   PetscMPIInt    rank_prec_comm,size_prec_comm;
3039   PetscInt       *dof_sizes,*dof_displs;
3040   PetscBool      first_found;
3041   PetscErrorCode ierr;
3042 
3043   PetscFunctionBegin;
3044   /* mpi buffers */
3045   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3046   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3047   j = ( !rank_prec_comm ? size_prec_comm : 0);
3048   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3049   /* get maximum size of subset */
3050   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3051   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3052   max_local = 0;
3053   for (i=0;i<n_local_dofs;i++) {
3054     if (max_local < temp_global_dofs[i] ) {
3055       max_local = temp_global_dofs[i];
3056     }
3057   }
3058   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3059   max_global++;
3060   max_local = 0;
3061   for (i=0;i<n_local_dofs;i++) {
3062     if (max_local < local_dofs[i] ) {
3063       max_local = local_dofs[i];
3064     }
3065   }
3066   max_local++;
3067   /* allocate workspace */
3068   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3069   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3070   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3071   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3072   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3073   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3074   /* create scatter */
3075   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3076   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3077   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3078   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3079   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3080   /* init array */
3081   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3082   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3083   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3084   if (local_dofs_mult) {
3085     for (i=0;i<n_local_dofs;i++) {
3086       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3087     }
3088   } else {
3089     for (i=0;i<n_local_dofs;i++) {
3090       array[local_dofs[i]]=1.0;
3091     }
3092   }
3093   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3094   /* scatter into global vec and get total number of global dofs */
3095   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3096   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3097   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3098   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3099   /* Fill global_vec with cumulative function for global numbering */
3100   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3101   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3102   nlocals = 0;
3103   first_index = -1;
3104   first_found = PETSC_FALSE;
3105   for (i=0;i<s;i++) {
3106     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3107       first_found = PETSC_TRUE;
3108       first_index = i;
3109     }
3110     nlocals += (PetscInt)PetscRealPart(array[i]);
3111   }
3112   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3113   if (!rank_prec_comm) {
3114     dof_displs[0]=0;
3115     for (i=1;i<size_prec_comm;i++) {
3116       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3117     }
3118   }
3119   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3120   if (first_found) {
3121     array[first_index] += (PetscScalar)nlocals;
3122     old_index = first_index;
3123     for (i=first_index+1;i<s;i++) {
3124       if (PetscRealPart(array[i]) > 0.1) {
3125         array[i] += array[old_index];
3126         old_index = i;
3127       }
3128     }
3129   }
3130   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3131   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3132   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3133   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3134   /* get global ordering of local dofs */
3135   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3136   if (local_dofs_mult) {
3137     for (i=0;i<n_local_dofs;i++) {
3138       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3139     }
3140   } else {
3141     for (i=0;i<n_local_dofs;i++) {
3142       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3143     }
3144   }
3145   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3146   /* free workspace */
3147   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3148   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3149   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3150   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3151   /* return pointer to global ordering of local dofs */
3152   *global_numbering_subset = temp_global_dofs;
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 #undef __FUNCT__
3157 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3158 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3159 {
3160   PetscInt       i,j;
3161   PetscScalar    *alphas;
3162   PetscErrorCode ierr;
3163 
3164   PetscFunctionBegin;
3165   /* this implements stabilized Gram-Schmidt */
3166   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3167   for (i=0;i<n;i++) {
3168     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3169     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3170     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3171   }
3172   ierr = PetscFree(alphas);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatISGetSubassemblingPattern"
3178 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3179 {
3180   Mat             subdomain_adj;
3181   IS              new_ranks,ranks_send_to;
3182   MatPartitioning partitioner;
3183   Mat_IS          *matis;
3184   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3185   PetscInt        prank;
3186   PetscMPIInt     size,rank,color;
3187   PetscInt        *xadj,*adjncy,*oldranks;
3188   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3189   PetscInt        i,local_size,threshold=0;
3190   PetscErrorCode  ierr;
3191   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3192   PetscSubcomm    subcomm;
3193 
3194   PetscFunctionBegin;
3195   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3196   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3197   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3198 
3199   /* Get info on mapping */
3200   matis = (Mat_IS*)(mat->data);
3201   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3202   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3203 
3204   /* build local CSR graph of subdomains' connectivity */
3205   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3206   xadj[0] = 0;
3207   xadj[1] = PetscMax(n_neighs-1,0);
3208   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3209   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3210 
3211   if (threshold) {
3212     PetscInt xadj_count = 0;
3213     for (i=1;i<n_neighs;i++) {
3214       if (n_shared[i] > threshold) {
3215         adjncy[xadj_count] = neighs[i];
3216         adjncy_wgt[xadj_count] = n_shared[i];
3217         xadj_count++;
3218       }
3219     }
3220     xadj[1] = xadj_count;
3221   } else {
3222     if (xadj[1]) {
3223       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3224       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3225     }
3226   }
3227   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3228   if (use_square) {
3229     for (i=0;i<xadj[1];i++) {
3230       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3231     }
3232   }
3233   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3234 
3235   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3236 
3237   /*
3238     Restrict work on active processes only.
3239   */
3240   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3241   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3242   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3243   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3244   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3245   if (color) {
3246     ierr = PetscFree(xadj);CHKERRQ(ierr);
3247     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3248     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3249   } else {
3250     PetscInt coarsening_ratio;
3251     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3252     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3253     prank = rank;
3254     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3255     /*
3256     for (i=0;i<size;i++) {
3257       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3258     }
3259     */
3260     for (i=0;i<xadj[1];i++) {
3261       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3262     }
3263     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3264     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3265     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3266 
3267     /* Partition */
3268     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3269     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3270     if (use_vwgt) {
3271       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3272       v_wgt[0] = local_size;
3273       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3274     }
3275     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3276     coarsening_ratio = size/n_subdomains;
3277     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3278     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3279     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3280     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3281 
3282     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3283     if (contiguous) {
3284       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3285     } else {
3286       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3287     }
3288     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3289     /* clean up */
3290     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3291     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3292     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3293     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3294   }
3295   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3296 
3297   /* assemble parallel IS for sends */
3298   i = 1;
3299   if (color) i=0;
3300   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3301 
3302   /* get back IS */
3303   *is_sends = ranks_send_to;
3304   PetscFunctionReturn(0);
3305 }
3306 
3307 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3308 
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatISSubassemble"
3311 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3312 {
3313   Mat                    local_mat;
3314   Mat_IS                 *matis;
3315   IS                     is_sends_internal;
3316   PetscInt               rows,cols,new_local_rows;
3317   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3318   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3319   ISLocalToGlobalMapping l2gmap;
3320   PetscInt*              l2gmap_indices;
3321   const PetscInt*        is_indices;
3322   MatType                new_local_type;
3323   /* buffers */
3324   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3325   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3326   PetscInt               *recv_buffer_idxs_local;
3327   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3328   /* MPI */
3329   MPI_Comm               comm,comm_n;
3330   PetscSubcomm           subcomm;
3331   PetscMPIInt            n_sends,n_recvs,commsize;
3332   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3333   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3334   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3335   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3336   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3337   PetscErrorCode         ierr;
3338 
3339   PetscFunctionBegin;
3340   /* TODO: add missing checks */
3341   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3342   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3343   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3344   PetscValidLogicalCollectiveInt(mat,nis,7);
3345   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3346   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3347   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3348   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3349   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3350   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3351   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3352   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3353     PetscInt mrows,mcols,mnrows,mncols;
3354     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3355     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3356     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3357     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3358     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3359     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3360   }
3361   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3362   PetscValidLogicalCollectiveInt(mat,bs,0);
3363   /* prepare IS for sending if not provided */
3364   if (!is_sends) {
3365     PetscBool pcontig = PETSC_TRUE;
3366     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3367     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3368   } else {
3369     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3370     is_sends_internal = is_sends;
3371   }
3372 
3373   /* get pointer of MATIS data */
3374   matis = (Mat_IS*)mat->data;
3375 
3376   /* get comm */
3377   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3378 
3379   /* compute number of sends */
3380   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3381   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3382 
3383   /* compute number of receives */
3384   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3385   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3386   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3387   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3388   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3389   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3390   ierr = PetscFree(iflags);CHKERRQ(ierr);
3391 
3392   /* restrict comm if requested */
3393   subcomm = 0;
3394   destroy_mat = PETSC_FALSE;
3395   if (restrict_comm) {
3396     PetscMPIInt color,subcommsize;
3397 
3398     color = 0;
3399     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3400     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3401     subcommsize = commsize - subcommsize;
3402     /* check if reuse has been requested */
3403     if (reuse == MAT_REUSE_MATRIX) {
3404       if (*mat_n) {
3405         PetscMPIInt subcommsize2;
3406         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3407         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3408         comm_n = PetscObjectComm((PetscObject)*mat_n);
3409       } else {
3410         comm_n = PETSC_COMM_SELF;
3411       }
3412     } else { /* MAT_INITIAL_MATRIX */
3413       PetscMPIInt rank;
3414 
3415       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3416       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3417       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3418       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3419       comm_n = PetscSubcommChild(subcomm);
3420     }
3421     /* flag to destroy *mat_n if not significative */
3422     if (color) destroy_mat = PETSC_TRUE;
3423   } else {
3424     comm_n = comm;
3425   }
3426 
3427   /* prepare send/receive buffers */
3428   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3429   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3430   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3431   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3432   if (nis) {
3433     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3434   }
3435 
3436   /* Get data from local matrices */
3437   if (!isdense) {
3438     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3439     /* TODO: See below some guidelines on how to prepare the local buffers */
3440     /*
3441        send_buffer_vals should contain the raw values of the local matrix
3442        send_buffer_idxs should contain:
3443        - MatType_PRIVATE type
3444        - PetscInt        size_of_l2gmap
3445        - PetscInt        global_row_indices[size_of_l2gmap]
3446        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3447     */
3448   } else {
3449     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3450     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3451     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3452     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3453     send_buffer_idxs[1] = i;
3454     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3455     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3456     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3457     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3458     for (i=0;i<n_sends;i++) {
3459       ilengths_vals[is_indices[i]] = len*len;
3460       ilengths_idxs[is_indices[i]] = len+2;
3461     }
3462   }
3463   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3464   /* additional is (if any) */
3465   if (nis) {
3466     PetscMPIInt psum;
3467     PetscInt j;
3468     for (j=0,psum=0;j<nis;j++) {
3469       PetscInt plen;
3470       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3471       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3472       psum += len+1; /* indices + lenght */
3473     }
3474     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3475     for (j=0,psum=0;j<nis;j++) {
3476       PetscInt plen;
3477       const PetscInt *is_array_idxs;
3478       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3479       send_buffer_idxs_is[psum] = plen;
3480       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3481       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3482       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3483       psum += plen+1; /* indices + lenght */
3484     }
3485     for (i=0;i<n_sends;i++) {
3486       ilengths_idxs_is[is_indices[i]] = psum;
3487     }
3488     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3489   }
3490 
3491   buf_size_idxs = 0;
3492   buf_size_vals = 0;
3493   buf_size_idxs_is = 0;
3494   for (i=0;i<n_recvs;i++) {
3495     buf_size_idxs += (PetscInt)olengths_idxs[i];
3496     buf_size_vals += (PetscInt)olengths_vals[i];
3497     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3498   }
3499   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3500   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3501   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3502 
3503   /* get new tags for clean communications */
3504   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3505   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3506   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3507 
3508   /* allocate for requests */
3509   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3510   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3511   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3512   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3513   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3514   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3515 
3516   /* communications */
3517   ptr_idxs = recv_buffer_idxs;
3518   ptr_vals = recv_buffer_vals;
3519   ptr_idxs_is = recv_buffer_idxs_is;
3520   for (i=0;i<n_recvs;i++) {
3521     source_dest = onodes[i];
3522     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3523     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3524     ptr_idxs += olengths_idxs[i];
3525     ptr_vals += olengths_vals[i];
3526     if (nis) {
3527       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3528       ptr_idxs_is += olengths_idxs_is[i];
3529     }
3530   }
3531   for (i=0;i<n_sends;i++) {
3532     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3533     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3534     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3535     if (nis) {
3536       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3537     }
3538   }
3539   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3540   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3541 
3542   /* assemble new l2g map */
3543   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3544   ptr_idxs = recv_buffer_idxs;
3545   new_local_rows = 0;
3546   for (i=0;i<n_recvs;i++) {
3547     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3548     ptr_idxs += olengths_idxs[i];
3549   }
3550   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3551   ptr_idxs = recv_buffer_idxs;
3552   new_local_rows = 0;
3553   for (i=0;i<n_recvs;i++) {
3554     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3555     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3556     ptr_idxs += olengths_idxs[i];
3557   }
3558   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3559   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3560   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3561 
3562   /* infer new local matrix type from received local matrices type */
3563   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3564   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3565   if (n_recvs) {
3566     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3567     ptr_idxs = recv_buffer_idxs;
3568     for (i=0;i<n_recvs;i++) {
3569       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3570         new_local_type_private = MATAIJ_PRIVATE;
3571         break;
3572       }
3573       ptr_idxs += olengths_idxs[i];
3574     }
3575     switch (new_local_type_private) {
3576       case MATDENSE_PRIVATE:
3577         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3578           new_local_type = MATSEQAIJ;
3579           bs = 1;
3580         } else { /* if I receive only 1 dense matrix */
3581           new_local_type = MATSEQDENSE;
3582           bs = 1;
3583         }
3584         break;
3585       case MATAIJ_PRIVATE:
3586         new_local_type = MATSEQAIJ;
3587         bs = 1;
3588         break;
3589       case MATBAIJ_PRIVATE:
3590         new_local_type = MATSEQBAIJ;
3591         break;
3592       case MATSBAIJ_PRIVATE:
3593         new_local_type = MATSEQSBAIJ;
3594         break;
3595       default:
3596         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3597         break;
3598     }
3599   } else { /* by default, new_local_type is seqdense */
3600     new_local_type = MATSEQDENSE;
3601     bs = 1;
3602   }
3603 
3604   /* create MATIS object if needed */
3605   if (reuse == MAT_INITIAL_MATRIX) {
3606     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3607     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3608   } else {
3609     /* it also destroys the local matrices */
3610     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3611   }
3612   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3613   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3614 
3615   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3616 
3617   /* Global to local map of received indices */
3618   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3619   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3620   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3621 
3622   /* restore attributes -> type of incoming data and its size */
3623   buf_size_idxs = 0;
3624   for (i=0;i<n_recvs;i++) {
3625     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3626     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3627     buf_size_idxs += (PetscInt)olengths_idxs[i];
3628   }
3629   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3630 
3631   /* set preallocation */
3632   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3633   if (!newisdense) {
3634     PetscInt *new_local_nnz=0;
3635 
3636     ptr_vals = recv_buffer_vals;
3637     ptr_idxs = recv_buffer_idxs_local;
3638     if (n_recvs) {
3639       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3640     }
3641     for (i=0;i<n_recvs;i++) {
3642       PetscInt j;
3643       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3644         for (j=0;j<*(ptr_idxs+1);j++) {
3645           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3646         }
3647       } else {
3648         /* TODO */
3649       }
3650       ptr_idxs += olengths_idxs[i];
3651     }
3652     if (new_local_nnz) {
3653       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3654       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3655       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3656       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3657       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3658       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3659     } else {
3660       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3661     }
3662     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3663   } else {
3664     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3665   }
3666 
3667   /* set values */
3668   ptr_vals = recv_buffer_vals;
3669   ptr_idxs = recv_buffer_idxs_local;
3670   for (i=0;i<n_recvs;i++) {
3671     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3672       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3673       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3674       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3675       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3676       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3677     } else {
3678       /* TODO */
3679     }
3680     ptr_idxs += olengths_idxs[i];
3681     ptr_vals += olengths_vals[i];
3682   }
3683   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3686   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3687   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3688   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3689 
3690 #if 0
3691   if (!restrict_comm) { /* check */
3692     Vec       lvec,rvec;
3693     PetscReal infty_error;
3694 
3695     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3696     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3697     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3698     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3699     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3700     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3701     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3702     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3703     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3704   }
3705 #endif
3706 
3707   /* assemble new additional is (if any) */
3708   if (nis) {
3709     PetscInt **temp_idxs,*count_is,j,psum;
3710 
3711     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3712     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3713     ptr_idxs = recv_buffer_idxs_is;
3714     psum = 0;
3715     for (i=0;i<n_recvs;i++) {
3716       for (j=0;j<nis;j++) {
3717         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3718         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3719         psum += plen;
3720         ptr_idxs += plen+1; /* shift pointer to received data */
3721       }
3722     }
3723     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3724     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3725     for (i=1;i<nis;i++) {
3726       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3727     }
3728     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3729     ptr_idxs = recv_buffer_idxs_is;
3730     for (i=0;i<n_recvs;i++) {
3731       for (j=0;j<nis;j++) {
3732         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3733         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3734         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3735         ptr_idxs += plen+1; /* shift pointer to received data */
3736       }
3737     }
3738     for (i=0;i<nis;i++) {
3739       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3740       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3741       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3742     }
3743     ierr = PetscFree(count_is);CHKERRQ(ierr);
3744     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3745     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3746   }
3747   /* free workspace */
3748   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3749   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3750   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3751   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3752   if (isdense) {
3753     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3754     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3755   } else {
3756     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3757   }
3758   if (nis) {
3759     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3760     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3761   }
3762   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3763   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3764   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3765   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3766   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3767   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3768   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3769   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3770   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3771   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3772   ierr = PetscFree(onodes);CHKERRQ(ierr);
3773   if (nis) {
3774     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3775     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3776     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3777   }
3778   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3779   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3780     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3781     for (i=0;i<nis;i++) {
3782       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3783     }
3784   }
3785   PetscFunctionReturn(0);
3786 }
3787 
3788 /* temporary hack into ksp private data structure */
3789 #include <petsc/private/kspimpl.h>
3790 
3791 #undef __FUNCT__
3792 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3793 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3794 {
3795   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3796   PC_IS                  *pcis = (PC_IS*)pc->data;
3797   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3798   MatNullSpace           CoarseNullSpace=NULL;
3799   ISLocalToGlobalMapping coarse_islg;
3800   IS                     coarse_is,*isarray;
3801   PetscInt               i,im_active=-1,active_procs=-1;
3802   PetscInt               nis,nisdofs,nisneu;
3803   PC                     pc_temp;
3804   PCType                 coarse_pc_type;
3805   KSPType                coarse_ksp_type;
3806   PetscBool              multilevel_requested,multilevel_allowed;
3807   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3808   Mat                    t_coarse_mat_is;
3809   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3810   PetscMPIInt            all_procs;
3811   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3812   PetscBool              compute_vecs = PETSC_FALSE;
3813   PetscScalar            *array;
3814   PetscErrorCode         ierr;
3815 
3816   PetscFunctionBegin;
3817   /* Assign global numbering to coarse dofs */
3818   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3819     PetscInt ocoarse_size;
3820     compute_vecs = PETSC_TRUE;
3821     ocoarse_size = pcbddc->coarse_size;
3822     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3823     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3824     /* see if we can avoid some work */
3825     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3826       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3827         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3828         coarse_reuse = PETSC_FALSE;
3829       } else { /* we can safely reuse already computed coarse matrix */
3830         coarse_reuse = PETSC_TRUE;
3831       }
3832     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3833       coarse_reuse = PETSC_FALSE;
3834     }
3835     /* reset any subassembling information */
3836     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3837     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3838   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3839     coarse_reuse = PETSC_TRUE;
3840   }
3841 
3842   /* count "active" (i.e. with positive local size) and "void" processes */
3843   im_active = !!(pcis->n);
3844   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3845   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3846   void_procs = all_procs-active_procs;
3847   csin_type_simple = PETSC_TRUE;
3848   redist = PETSC_FALSE;
3849   if (pcbddc->current_level && void_procs) {
3850     csin_ml = PETSC_TRUE;
3851     ncoarse_ml = void_procs;
3852     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3853     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3854       csin_ds = PETSC_TRUE;
3855       ncoarse_ds = pcbddc->redistribute_coarse;
3856       redist = PETSC_TRUE;
3857     } else {
3858       csin_ds = PETSC_TRUE;
3859       ncoarse_ds = active_procs;
3860       redist = PETSC_TRUE;
3861     }
3862   } else {
3863     csin_ml = PETSC_FALSE;
3864     ncoarse_ml = all_procs;
3865     if (void_procs) {
3866       csin_ds = PETSC_TRUE;
3867       ncoarse_ds = void_procs;
3868       csin_type_simple = PETSC_FALSE;
3869     } else {
3870       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3871         csin_ds = PETSC_TRUE;
3872         ncoarse_ds = pcbddc->redistribute_coarse;
3873         redist = PETSC_TRUE;
3874       } else {
3875         csin_ds = PETSC_FALSE;
3876         ncoarse_ds = all_procs;
3877       }
3878     }
3879   }
3880 
3881   /*
3882     test if we can go multilevel: three conditions must be satisfied:
3883     - we have not exceeded the number of levels requested
3884     - we can actually subassemble the active processes
3885     - we can find a suitable number of MPI processes where we can place the subassembled problem
3886   */
3887   multilevel_allowed = PETSC_FALSE;
3888   multilevel_requested = PETSC_FALSE;
3889   if (pcbddc->current_level < pcbddc->max_levels) {
3890     multilevel_requested = PETSC_TRUE;
3891     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3892       multilevel_allowed = PETSC_FALSE;
3893     } else {
3894       multilevel_allowed = PETSC_TRUE;
3895     }
3896   }
3897   /* determine number of process partecipating to coarse solver */
3898   if (multilevel_allowed) {
3899     ncoarse = ncoarse_ml;
3900     csin = csin_ml;
3901     redist = PETSC_FALSE;
3902   } else {
3903     ncoarse = ncoarse_ds;
3904     csin = csin_ds;
3905   }
3906 
3907   /* creates temporary l2gmap and IS for coarse indexes */
3908   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3909   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3910 
3911   /* creates temporary MATIS object for coarse matrix */
3912   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3913   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3914   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3915   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3916 #if 0
3917   {
3918     PetscViewer viewer;
3919     char filename[256];
3920     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3921     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3922     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3923     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3924     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3925   }
3926 #endif
3927   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3928   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3929   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3932 
3933   /* compute dofs splitting and neumann boundaries for coarse dofs */
3934   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3935     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3936     const PetscInt         *idxs;
3937     ISLocalToGlobalMapping tmap;
3938 
3939     /* create map between primal indices (in local representative ordering) and local primal numbering */
3940     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3941     /* allocate space for temporary storage */
3942     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3943     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3944     /* allocate for IS array */
3945     nisdofs = pcbddc->n_ISForDofsLocal;
3946     nisneu = !!pcbddc->NeumannBoundariesLocal;
3947     nis = nisdofs + nisneu;
3948     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3949     /* dofs splitting */
3950     for (i=0;i<nisdofs;i++) {
3951       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3952       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3953       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3954       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3955       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3956       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3957       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3958       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3959     }
3960     /* neumann boundaries */
3961     if (pcbddc->NeumannBoundariesLocal) {
3962       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3963       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3964       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3965       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3966       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3967       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3968       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3969       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3970     }
3971     /* free memory */
3972     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3973     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3974     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3975   } else {
3976     nis = 0;
3977     nisdofs = 0;
3978     nisneu = 0;
3979     isarray = NULL;
3980   }
3981   /* destroy no longer needed map */
3982   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3983 
3984   /* restrict on coarse candidates (if needed) */
3985   coarse_mat_is = NULL;
3986   if (csin) {
3987     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3988       if (redist) {
3989         PetscMPIInt rank;
3990         PetscInt    spc,n_spc_p1,dest[1],destsize;
3991 
3992         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3993         spc = active_procs/ncoarse;
3994         n_spc_p1 = active_procs%ncoarse;
3995         if (im_active) {
3996           destsize = 1;
3997           if (rank > n_spc_p1*(spc+1)-1) {
3998             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3999           } else {
4000             dest[0] = rank/(spc+1);
4001           }
4002         } else {
4003           destsize = 0;
4004         }
4005         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4006       } else if (csin_type_simple) {
4007         PetscMPIInt rank;
4008         PetscInt    issize,isidx;
4009 
4010         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4011         if (im_active) {
4012           issize = 1;
4013           isidx = (PetscInt)rank;
4014         } else {
4015           issize = 0;
4016           isidx = -1;
4017         }
4018         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4019       } else { /* get a suitable subassembling pattern from MATIS code */
4020         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4021       }
4022 
4023       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4024       if (!redist || ncoarse <= void_procs) {
4025         PetscInt ncoarse_cand,tissize,*nisindices;
4026         PetscInt *coarse_candidates;
4027         const PetscInt* tisindices;
4028 
4029         /* get coarse candidates' ranks in pc communicator */
4030         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4031         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4032         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4033           if (!coarse_candidates[i]) {
4034             coarse_candidates[ncoarse_cand++]=i;
4035           }
4036         }
4037         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4038 
4039 
4040         if (pcbddc->dbg_flag) {
4041           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4042           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4043           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4044           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4045           for (i=0;i<ncoarse_cand;i++) {
4046             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4047           }
4048           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4049           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4050         }
4051         /* shift the pattern on coarse candidates */
4052         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4053         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4054         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4055         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4056         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4057         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4058         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4059       }
4060       if (pcbddc->dbg_flag) {
4061         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4062         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4063         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4064         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4065       }
4066     }
4067     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4068     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4069   } else {
4070     if (pcbddc->dbg_flag) {
4071       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4072       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4073       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4074     }
4075     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4076     coarse_mat_is = t_coarse_mat_is;
4077   }
4078 
4079   /* create local to global scatters for coarse problem */
4080   if (compute_vecs) {
4081     PetscInt lrows;
4082     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4083     if (coarse_mat_is) {
4084       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4085     } else {
4086       lrows = 0;
4087     }
4088     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4089     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4090     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4091     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4092     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4093   }
4094   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4095   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4096 
4097   /* set defaults for coarse KSP and PC */
4098   if (multilevel_allowed) {
4099     coarse_ksp_type = KSPRICHARDSON;
4100     coarse_pc_type = PCBDDC;
4101   } else {
4102     coarse_ksp_type = KSPPREONLY;
4103     coarse_pc_type = PCREDUNDANT;
4104   }
4105 
4106   /* print some info if requested */
4107   if (pcbddc->dbg_flag) {
4108     if (!multilevel_allowed) {
4109       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4110       if (multilevel_requested) {
4111         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4112       } else if (pcbddc->max_levels) {
4113         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4114       }
4115       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4116     }
4117   }
4118 
4119   /* create the coarse KSP object only once with defaults */
4120   if (coarse_mat_is) {
4121     MatReuse coarse_mat_reuse;
4122     PetscViewer dbg_viewer = NULL;
4123     if (pcbddc->dbg_flag) {
4124       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4125       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4126     }
4127     if (!pcbddc->coarse_ksp) {
4128       char prefix[256],str_level[16];
4129       size_t len;
4130       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4131       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4132       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4133       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4134       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4135       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4136       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4137       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4138       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4139       /* prefix */
4140       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4141       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4142       if (!pcbddc->current_level) {
4143         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4144         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4145       } else {
4146         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4147         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4148         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4149         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4150         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4151         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4152       }
4153       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4154       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4155       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4156       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4157       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4158       /* allow user customization */
4159       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4160     }
4161     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4162     if (nisdofs) {
4163       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4164       for (i=0;i<nisdofs;i++) {
4165         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4166       }
4167     }
4168     if (nisneu) {
4169       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4170       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4171     }
4172 
4173     /* get some info after set from options */
4174     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4175     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4176     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4177     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4178     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4179       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4180       isbddc = PETSC_FALSE;
4181     }
4182     if (isredundant) {
4183       KSP inner_ksp;
4184       PC inner_pc;
4185       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4186       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4187       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4188     }
4189 
4190     /* assemble coarse matrix */
4191     if (coarse_reuse) {
4192       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4193       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4194       coarse_mat_reuse = MAT_REUSE_MATRIX;
4195     } else {
4196       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4197     }
4198     if (isbddc || isnn) {
4199       if (pcbddc->coarsening_ratio > 1) {
4200         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4201           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4202           if (pcbddc->dbg_flag) {
4203             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4204             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4205             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4206             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4207           }
4208         }
4209         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4210       } else {
4211         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4212         coarse_mat = coarse_mat_is;
4213       }
4214     } else {
4215       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4216     }
4217     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4218 
4219     /* propagate symmetry info to coarse matrix */
4220     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4221     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4222 
4223     /* set operators */
4224     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4225     if (pcbddc->dbg_flag) {
4226       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4227     }
4228   } else { /* processes non partecipating to coarse solver (if any) */
4229     coarse_mat = 0;
4230   }
4231   ierr = PetscFree(isarray);CHKERRQ(ierr);
4232 #if 0
4233   {
4234     PetscViewer viewer;
4235     char filename[256];
4236     sprintf(filename,"coarse_mat.m");
4237     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4238     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4239     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4240     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4241   }
4242 #endif
4243 
4244   /* Compute coarse null space (special handling by BDDC only) */
4245   if (pcbddc->NullSpace) {
4246     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4247   }
4248 
4249   if (pcbddc->coarse_ksp) {
4250     Vec crhs,csol;
4251     PetscBool ispreonly;
4252     if (CoarseNullSpace) {
4253       if (isbddc) {
4254         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4255       } else {
4256         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4257       }
4258     }
4259     /* setup coarse ksp */
4260     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4261     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4262     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4263     /* hack */
4264     if (!csol) {
4265       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4266     }
4267     if (!crhs) {
4268       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4269     }
4270     /* Check coarse problem if in debug mode or if solving with an iterative method */
4271     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4272     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4273       KSP       check_ksp;
4274       KSPType   check_ksp_type;
4275       PC        check_pc;
4276       Vec       check_vec,coarse_vec;
4277       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4278       PetscInt  its;
4279       PetscBool compute_eigs;
4280       PetscReal *eigs_r,*eigs_c;
4281       PetscInt  neigs;
4282       const char *prefix;
4283 
4284       /* Create ksp object suitable for estimation of extreme eigenvalues */
4285       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4286       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4287       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4288       if (ispreonly) {
4289         check_ksp_type = KSPPREONLY;
4290         compute_eigs = PETSC_FALSE;
4291       } else {
4292         check_ksp_type = KSPGMRES;
4293         compute_eigs = PETSC_TRUE;
4294       }
4295       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4296       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4297       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4298       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4299       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4300       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4301       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4302       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4303       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4304       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4305       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4306       /* create random vec */
4307       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4308       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4309       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4310       if (CoarseNullSpace) {
4311         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4312       }
4313       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4314       /* solve coarse problem */
4315       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4316       if (CoarseNullSpace) {
4317         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4318       }
4319       /* set eigenvalue estimation if preonly has not been requested */
4320       if (compute_eigs) {
4321         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4322         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4323         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4324         lambda_max = eigs_r[neigs-1];
4325         lambda_min = eigs_r[0];
4326         if (pcbddc->use_coarse_estimates) {
4327           if (lambda_max>lambda_min) {
4328             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4329             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4330           }
4331         }
4332       }
4333 
4334       /* check coarse problem residual error */
4335       if (pcbddc->dbg_flag) {
4336         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4337         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4338         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4339         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4340         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4341         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4342         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4343         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4344         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4345         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4346         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4347         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4348         if (compute_eigs) {
4349           PetscReal lambda_max_s,lambda_min_s;
4350           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4351           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4352           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4353           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4354           for (i=0;i<neigs;i++) {
4355             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4356           }
4357         }
4358         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4359         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4360       }
4361       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4362       if (compute_eigs) {
4363         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4364         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4365       }
4366     }
4367   }
4368   /* print additional info */
4369   if (pcbddc->dbg_flag) {
4370     /* waits until all processes reaches this point */
4371     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4372     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4373     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4374   }
4375 
4376   /* free memory */
4377   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4378   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4379   PetscFunctionReturn(0);
4380 }
4381 
4382 #undef __FUNCT__
4383 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4384 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4385 {
4386   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4387   PC_IS*         pcis = (PC_IS*)pc->data;
4388   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4389   PetscInt       i,coarse_size=0;
4390   PetscInt       *local_primal_indices=NULL;
4391   PetscErrorCode ierr;
4392 
4393   PetscFunctionBegin;
4394   /* Compute global number of coarse dofs */
4395   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4396     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4397   }
4398   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4399 
4400   /* check numbering */
4401   if (pcbddc->dbg_flag) {
4402     PetscScalar coarsesum,*array;
4403     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4404 
4405     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4406     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4407     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4408     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4409     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4410     for (i=0;i<pcbddc->local_primal_size;i++) {
4411       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4412     }
4413     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4414     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4415     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4416     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4417     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4418     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4419     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4420     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4421     for (i=0;i<pcis->n;i++) {
4422       if (array[i] == 1.0) {
4423         set_error = PETSC_TRUE;
4424         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4425       }
4426     }
4427     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4428     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4429     for (i=0;i<pcis->n;i++) {
4430       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4431     }
4432     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4433     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4434     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4435     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4436     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4437     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4438     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4439       PetscInt *gidxs;
4440 
4441       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4442       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4443       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4444       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4445       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4446       for (i=0;i<pcbddc->local_primal_size;i++) {
4447         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4448       }
4449       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4450       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4451     }
4452     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4453     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4454   }
4455   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4456   /* get back data */
4457   *coarse_size_n = coarse_size;
4458   *local_primal_indices_n = local_primal_indices;
4459   PetscFunctionReturn(0);
4460 }
4461 
4462 #undef __FUNCT__
4463 #define __FUNCT__ "PCBDDCGlobalToLocal"
4464 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4465 {
4466   IS             localis_t;
4467   PetscInt       i,lsize,*idxs,n;
4468   PetscScalar    *vals;
4469   PetscErrorCode ierr;
4470 
4471   PetscFunctionBegin;
4472   /* get indices in local ordering exploiting local to global map */
4473   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4474   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4475   for (i=0;i<lsize;i++) vals[i] = 1.0;
4476   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4477   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4478   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4479   if (idxs) { /* multilevel guard */
4480     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4481   }
4482   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4483   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4484   ierr = PetscFree(vals);CHKERRQ(ierr);
4485   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4486   /* now compute set in local ordering */
4487   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4488   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4489   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4490   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4491   for (i=0,lsize=0;i<n;i++) {
4492     if (PetscRealPart(vals[i]) > 0.5) {
4493       lsize++;
4494     }
4495   }
4496   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4497   for (i=0,lsize=0;i<n;i++) {
4498     if (PetscRealPart(vals[i]) > 0.5) {
4499       idxs[lsize++] = i;
4500     }
4501   }
4502   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4503   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4504   *localis = localis_t;
4505   PetscFunctionReturn(0);
4506 }
4507 
4508 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4509 #undef __FUNCT__
4510 #define __FUNCT__ "PCBDDCMatMult_Private"
4511 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4512 {
4513   PCBDDCChange_ctx change_ctx;
4514   PetscErrorCode   ierr;
4515 
4516   PetscFunctionBegin;
4517   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4518   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4519   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4520   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4521   PetscFunctionReturn(0);
4522 }
4523 
4524 #undef __FUNCT__
4525 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4526 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4527 {
4528   PCBDDCChange_ctx change_ctx;
4529   PetscErrorCode   ierr;
4530 
4531   PetscFunctionBegin;
4532   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4533   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4534   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4535   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4536   PetscFunctionReturn(0);
4537 }
4538 
4539 #undef __FUNCT__
4540 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4541 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4542 {
4543   PC_IS               *pcis=(PC_IS*)pc->data;
4544   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4545   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4546   Mat                 S_j;
4547   PetscInt            *used_xadj,*used_adjncy;
4548   PetscBool           free_used_adj;
4549   PetscErrorCode      ierr;
4550 
4551   PetscFunctionBegin;
4552   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4553   free_used_adj = PETSC_FALSE;
4554   if (pcbddc->sub_schurs_layers == -1) {
4555     used_xadj = NULL;
4556     used_adjncy = NULL;
4557   } else {
4558     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4559       used_xadj = pcbddc->mat_graph->xadj;
4560       used_adjncy = pcbddc->mat_graph->adjncy;
4561     } else if (pcbddc->computed_rowadj) {
4562       used_xadj = pcbddc->mat_graph->xadj;
4563       used_adjncy = pcbddc->mat_graph->adjncy;
4564     } else {
4565       PetscBool      flg_row=PETSC_FALSE;
4566       const PetscInt *xadj,*adjncy;
4567       PetscInt       nvtxs;
4568 
4569       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4570       if (flg_row) {
4571         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4572         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4573         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4574         free_used_adj = PETSC_TRUE;
4575       } else {
4576         pcbddc->sub_schurs_layers = -1;
4577         used_xadj = NULL;
4578         used_adjncy = NULL;
4579       }
4580       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4581     }
4582   }
4583 
4584   /* setup sub_schurs data */
4585   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4586   if (!sub_schurs->use_mumps) {
4587     /* pcbddc->ksp_D up to date only if not using MUMPS */
4588     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4589     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4590   } else {
4591     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4592   }
4593   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4594 
4595   /* free adjacency */
4596   if (free_used_adj) {
4597     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4598   }
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 #undef __FUNCT__
4603 #define __FUNCT__ "PCBDDCInitSubSchurs"
4604 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4605 {
4606   PC_IS               *pcis=(PC_IS*)pc->data;
4607   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4608   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4609   PCBDDCGraph         graph;
4610   PetscErrorCode      ierr;
4611 
4612   PetscFunctionBegin;
4613   /* attach interface graph for determining subsets */
4614   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4615     IS verticesIS;
4616 
4617     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4618     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4619     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4620     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4621     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4622     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4623 /*
4624     if (pcbddc->dbg_flag) {
4625       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4626     }
4627 */
4628   } else {
4629     graph = pcbddc->mat_graph;
4630   }
4631 
4632   /* sub_schurs init */
4633   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4634 
4635   /* free graph struct */
4636   if (pcbddc->sub_schurs_rebuild) {
4637     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4638   }
4639   PetscFunctionReturn(0);
4640 }
4641