xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision bc8044897e7dd696d944c9da6932220d985c1e43)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
124                       nv+cum,&pcbddc->adaptive_constraints_idxs,
125                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
126   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   maxneigs = 0;
129   cum = cum2 = cumarray = 0;
130   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
131     const PetscInt *idxs;
132 
133     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
134     for (cum=0;cum<nv;cum++) {
135       pcbddc->adaptive_constraints_n[cum] = 1;
136       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
137       pcbddc->adaptive_constraints_ptrs[cum] = cum;
138       pcbddc->adaptive_constraints_data[cum] = 1.0;
139     }
140     cum2 = cum;
141     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
142   }
143 
144   if (mss) { /* multilevel */
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
147   }
148 
149   for (i=0;i<sub_schurs->n_subs;i++) {
150     PetscInt j,subset_size;
151 
152     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
153     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
154       const PetscInt *idxs;
155       PetscReal      infty = PETSC_MAX_REAL;
156       PetscInt       eigs_start = 0;
157       PetscBLASInt   B_N;
158 
159       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
160       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
161         if (sub_schurs->is_hermitian) {
162           PetscInt j;
163           for (j=0;j<subset_size;j++) {
164             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
165           }
166           for (j=0;j<subset_size;j++) {
167             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
168           }
169         } else {
170           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         }
173       } else {
174         S = Sarray + cumarray;
175         St = Starray + cumarray;
176       }
177 
178       /* Threshold: this is an heuristic for edges */
179       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
180       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
181 
182       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
183         PetscBLASInt B_itype = 1;
184         PetscBLASInt B_IL, B_IU;
185         PetscReal    eps = -1.0; /* dlamch? */
186         PetscInt     nmin_s;
187 
188         /* ask for eigenvalues larger than thresh */
189         if (pcbddc->dbg_flag) {
190           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
191         }
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           eigs_start = B_neigs -nmax;
214           B_neigs = nmax;
215         }
216 
217         nmin_s = PetscMin(nmin,B_N);
218         if (B_neigs < nmin_s) {
219           PetscBLASInt B_neigs2;
220 
221           B_IU = B_N - B_neigs;
222           B_IL = B_N - nmin_s + 1;
223           if (pcbddc->dbg_flag) {
224             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
225           }
226           if (sub_schurs->is_hermitian) {
227             PetscInt j;
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231             for (j=0;j<subset_size;j++) {
232               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
233             }
234           } else {
235             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237           }
238           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
239 #if defined(PETSC_USE_COMPLEX)
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
241 #else
242           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
243 #endif
244           ierr = PetscFPTrapPop();CHKERRQ(ierr);
245           B_neigs += B_neigs2;
246         }
247         if (B_ierr) {
248           if (B_ierr < 0 ) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
250           } else if (B_ierr <= B_N) {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
252           } else {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
254           }
255         }
256         if (pcbddc->dbg_flag) {
257           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
258           for (j=0;j<B_neigs;j++) {
259             if (eigs[j] == 0.0) {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
261             } else {
262               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
263             }
264           }
265         }
266       } else {
267           /* TODO */
268       }
269 
270       maxneigs = PetscMax(B_neigs,maxneigs);
271       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+cum2]);
280           }
281         }
282       }
283       for (j=0;j<B_neigs;j++) {
284 #if 0
285         {
286           PetscBLASInt Blas_N,Blas_one = 1.0;
287           PetscScalar norm;
288           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
289           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
290           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
291             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
292           } else {
293             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
294           }
295           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
296         }
297 #endif
298         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
299         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
300         cum2 += subset_size;
301       }
302       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
303     }
304     /* shift for next computation */
305     cumarray += subset_size*subset_size;
306   }
307   if (pcbddc->dbg_flag) {
308     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
309   }
310   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
311 
312   if (mss) {
313     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
314     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
315     /* destroy matrices (junk) */
316     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
317     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
318   }
319   if (allocated_S_St) {
320     ierr = PetscFree2(S,St);CHKERRQ(ierr);
321   }
322   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     Mat         C_B,CPHI;
974     IS          is_dummy;
975     Vec         mones;
976     MatType     checkmattype=MATSEQAIJ;
977     PetscReal   real_value;
978 
979     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
981     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
982     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
983     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
984     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
985     if (unsymmetric_check) {
986       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
987       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
988     }
989     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
990 
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
993     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
994     if (unsymmetric_check) {
995       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1006       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1007     } else {
1008       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1009       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1014       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1015       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1016     }
1017     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1021     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1024     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1025 
1026     /* check constraints */
1027     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1028     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1029     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1030     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1031     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1032     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1033     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1034     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1035     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1036     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1038     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1039 
1040     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1041     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1042     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1043     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1044     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1045     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1046     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1047     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1048     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1049     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1050     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1053       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1054     }
1055     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1056   }
1057 
1058   /* free memory */
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (n_vertices) {
1061     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1062   }
1063   if (n_constraints) {
1064     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1065   }
1066   /* get back data */
1067   *coarse_submat_vals_n = coarse_submat_vals;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1073 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1074 {
1075   Mat            *work_mat;
1076   IS             isrow_s,iscol_s;
1077   PetscBool      rsorted,csorted;
1078   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1083   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1084   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1085   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1086 
1087   if (!rsorted) {
1088     const PetscInt *idxs;
1089     PetscInt *idxs_sorted,i;
1090 
1091     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1092     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1093     for (i=0;i<rsize;i++) {
1094       idxs_perm_r[i] = i;
1095     }
1096     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1097     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1098     for (i=0;i<rsize;i++) {
1099       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1100     }
1101     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1102     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1105     isrow_s = isrow;
1106   }
1107 
1108   if (!csorted) {
1109     if (isrow == iscol) {
1110       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1111       iscol_s = isrow_s;
1112     } else {
1113       const PetscInt *idxs;
1114       PetscInt *idxs_sorted,i;
1115 
1116       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1117       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1118       for (i=0;i<csize;i++) {
1119         idxs_perm_c[i] = i;
1120       }
1121       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1122       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1123       for (i=0;i<csize;i++) {
1124         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1125       }
1126       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1127       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1128     }
1129   } else {
1130     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1131     iscol_s = iscol;
1132   }
1133 
1134   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1135 
1136   if (!rsorted || !csorted) {
1137     Mat      new_mat;
1138     IS       is_perm_r,is_perm_c;
1139 
1140     if (!rsorted) {
1141       PetscInt *idxs_r,i;
1142       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1143       for (i=0;i<rsize;i++) {
1144         idxs_r[idxs_perm_r[i]] = i;
1145       }
1146       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1147       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1148     } else {
1149       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1150     }
1151     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1152 
1153     if (!csorted) {
1154       if (isrow_s == iscol_s) {
1155         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1156         is_perm_c = is_perm_r;
1157       } else {
1158         PetscInt *idxs_c,i;
1159         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1160         for (i=0;i<csize;i++) {
1161           idxs_c[idxs_perm_c[i]] = i;
1162         }
1163         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1164         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1165       }
1166     } else {
1167       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1168     }
1169     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1170 
1171     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1173     work_mat[0] = new_mat;
1174     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1175     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1176   }
1177 
1178   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1179   *B = work_mat[0];
1180   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1181   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1182   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1188 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1189 {
1190   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1192   Mat            new_mat;
1193   IS             is_local,is_global;
1194   PetscInt       local_size;
1195   PetscBool      isseqaij;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1200   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1201   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1202   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1203   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1205   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1206 
1207   /* check */
1208   if (pcbddc->dbg_flag) {
1209     Vec       x,x_change;
1210     PetscReal error;
1211 
1212     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1213     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1214     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1215     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1221     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1222     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1223     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1224     ierr = VecDestroy(&x);CHKERRQ(ierr);
1225     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1226   }
1227 
1228   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1229   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1230   if (isseqaij) {
1231     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232   } else {
1233     Mat work_mat;
1234     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1235     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1237   }
1238   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1239 #if !defined(PETSC_USE_COMPLEX)
1240   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1241 #endif
1242   /*
1243   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1244   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1245   */
1246   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1252 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1253 {
1254   PC_IS*         pcis = (PC_IS*)(pc->data);
1255   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1256   IS             is_aux1,is_aux2;
1257   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1258   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1259   PetscInt       vbs,bs;
1260   PetscBT        bitmask;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   /*
1265     No need to setup local scatters if
1266       - primal space is unchanged
1267         AND
1268       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1269         AND
1270       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1271   */
1272   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1273     PetscFunctionReturn(0);
1274   }
1275   /* destroy old objects */
1276   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1278   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1279   /* Set Non-overlapping dimensions */
1280   n_B = pcis->n_B; n_D = pcis->n - n_B;
1281   n_vertices = pcbddc->n_actual_vertices;
1282   /* create auxiliary bitmask */
1283   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1284   for (i=0;i<n_vertices;i++) {
1285     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1286   }
1287 
1288   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1289   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1290   for (i=0, n_R=0; i<pcis->n; i++) {
1291     if (!PetscBTLookup(bitmask,i)) {
1292       idx_R_local[n_R] = i;
1293       n_R++;
1294     }
1295   }
1296 
1297   /* Block code */
1298   vbs = 1;
1299   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1300   if (bs>1 && !(n_vertices%bs)) {
1301     PetscBool is_blocked = PETSC_TRUE;
1302     PetscInt  *vary;
1303     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1304     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1305     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1306     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1307     for (i=0; i<n_vertices/bs; i++) {
1308       if (vary[i]!=0 && vary[i]!=bs) {
1309         is_blocked = PETSC_FALSE;
1310         break;
1311       }
1312     }
1313     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1314       vbs = bs;
1315       for (i=0;i<n_R/vbs;i++) {
1316         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1317       }
1318     }
1319     ierr = PetscFree(vary);CHKERRQ(ierr);
1320   }
1321   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1322   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1323 
1324   /* print some info if requested */
1325   if (pcbddc->dbg_flag) {
1326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1329     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1330     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1331     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1332     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1333     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1334   }
1335 
1336   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1337   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1339   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1340   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0; i<n_D; i++) {
1342     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1343   }
1344   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0, j=0; i<n_R; i++) {
1346     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1347       aux_array1[j++] = i;
1348     }
1349   }
1350   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1352   for (i=0, j=0; i<n_B; i++) {
1353     if (!PetscBTLookup(bitmask,is_indices[i])) {
1354       aux_array2[j++] = i;
1355     }
1356   }
1357   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1358   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1359   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1360   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1361   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1362 
1363   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1364     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1365     for (i=0, j=0; i<n_R; i++) {
1366       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1367         aux_array1[j++] = i;
1368       }
1369     }
1370     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1371     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1372     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1373   }
1374   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1375   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1382 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1383 {
1384   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1385   PC_IS          *pcis = (PC_IS*)pc->data;
1386   PC             pc_temp;
1387   Mat            A_RR;
1388   MatReuse       reuse;
1389   PetscScalar    m_one = -1.0;
1390   PetscReal      value;
1391   PetscInt       n_D,n_R,ibs,mbs;
1392   PetscBool      use_exact,use_exact_reduced,issbaij;
1393   PetscErrorCode ierr;
1394   /* prefixes stuff */
1395   char           dir_prefix[256],neu_prefix[256],str_level[16];
1396   size_t         len;
1397 
1398   PetscFunctionBegin;
1399 
1400   /* compute prefixes */
1401   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1402   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1403   if (!pcbddc->current_level) {
1404     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1405     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1406     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1407     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1410     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1411     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1412     len -= 15; /* remove "pc_bddc_coarse_" */
1413     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1414     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1415     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1416     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1417     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1418     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1419     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1420     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1421   }
1422 
1423   /* DIRICHLET PROBLEM */
1424   if (dirichlet) {
1425     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1426     if (pcbddc->issym) {
1427       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1428     }
1429     /* Matrix for Dirichlet problem is pcis->A_II */
1430     n_D = pcis->n - pcis->n_B;
1431     if (!pcbddc->ksp_D) { /* create object if not yet build */
1432       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1433       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1434       /* default */
1435       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1436       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1437       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1439       if (issbaij) {
1440         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1441       } else {
1442         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1443       }
1444       /* Allow user's customization */
1445       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1446       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1447     }
1448     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1449     if (sub_schurs->interior_solver) {
1450       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1451     }
1452     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1453     if (!n_D) {
1454       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1455       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1456     }
1457     /* Set Up KSP for Dirichlet problem of BDDC */
1458     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1459     /* set ksp_D into pcis data */
1460     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1461     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1462     pcis->ksp_D = pcbddc->ksp_D;
1463   }
1464 
1465   /* NEUMANN PROBLEM */
1466   A_RR = 0;
1467   if (neumann) {
1468     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1469     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1470     if (pcbddc->ksp_R) { /* already created ksp */
1471       PetscInt nn_R;
1472       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1473       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1474       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1475       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1476         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1477         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478         reuse = MAT_INITIAL_MATRIX;
1479       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1480         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1481           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482           reuse = MAT_INITIAL_MATRIX;
1483         } else { /* safe to reuse the matrix */
1484           reuse = MAT_REUSE_MATRIX;
1485         }
1486       }
1487       /* last check */
1488       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1489         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1490         reuse = MAT_INITIAL_MATRIX;
1491       }
1492     } else { /* first time, so we need to create the matrix */
1493       reuse = MAT_INITIAL_MATRIX;
1494     }
1495     /* extract A_RR */
1496     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1497     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1498     if (ibs != mbs) {
1499       Mat newmat;
1500       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1501       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1502       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1505     }
1506     if (pcbddc->issym) {
1507       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1508     }
1509     if (!pcbddc->ksp_R) { /* create object if not present */
1510       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1511       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1512       /* default */
1513       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1514       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1516       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1517       if (issbaij) {
1518         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1519       } else {
1520         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1521       }
1522       /* Allow user's customization */
1523       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1524       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1527     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1528     if (!n_R) {
1529       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1530       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1531     }
1532     /* Set Up KSP for Neumann problem of BDDC */
1533     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1534   }
1535 
1536   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1537   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1538     if (pcbddc->dbg_flag) {
1539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1542     }
1543     if (dirichlet) { /* Dirichlet */
1544       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1545       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1546       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1547       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1548       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1549       /* need to be adapted? */
1550       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1551       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1552       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1553       /* print info */
1554       if (pcbddc->dbg_flag) {
1555         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1556         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1557       }
1558       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1559         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1560       }
1561     }
1562     if (neumann) { /* Neumann */
1563       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1564       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1565       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1566       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1567       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1568       /* need to be adapted? */
1569       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1570       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1571       /* print info */
1572       if (pcbddc->dbg_flag) {
1573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1574         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1575       }
1576       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1577         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1578       }
1579     }
1580   }
1581   /* free Neumann problem's matrix */
1582   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1588 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1589 {
1590   PetscErrorCode ierr;
1591   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1592 
1593   PetscFunctionBegin;
1594   if (applytranspose) {
1595     if (pcbddc->local_auxmat1) {
1596       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1597       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1598     }
1599     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1600   } else {
1601     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1602     if (pcbddc->local_auxmat1) {
1603       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1604       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1605     }
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1611 #undef __FUNCT__
1612 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1613 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1614 {
1615   PetscErrorCode ierr;
1616   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1617   PC_IS*            pcis = (PC_IS*)  (pc->data);
1618   const PetscScalar zero = 0.0;
1619 
1620   PetscFunctionBegin;
1621   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1622   if (applytranspose) {
1623     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1624     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1625   } else {
1626     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1628   }
1629   /* start communications from local primal nodes to rhs of coarse solver */
1630   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1631   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1635   /* TODO remove null space when doing multilevel */
1636   if (pcbddc->coarse_ksp) {
1637     Vec rhs,sol;
1638 
1639     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1640     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1641     if (applytranspose) {
1642       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1643     } else {
1644       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1645     }
1646   }
1647 
1648   /* Local solution on R nodes */
1649   if (pcis->n) {
1650     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     }
1657     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1658     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (pcbddc->switch_static) {
1662       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     }
1665   }
1666 
1667   /* communications from coarse sol to local primal nodes */
1668   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1669   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1670 
1671   /* Sum contributions from two levels */
1672   if (applytranspose) {
1673     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1674     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1675   } else {
1676     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1677     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1684 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1685 {
1686   PetscErrorCode ierr;
1687   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1688   PetscScalar    *array;
1689   Vec            from,to;
1690 
1691   PetscFunctionBegin;
1692   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1693     from = pcbddc->coarse_vec;
1694     to = pcbddc->vec1_P;
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697 
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1700       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1702       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1703       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1704     }
1705   } else { /* from local to global -> put data in coarse right hand side */
1706     from = pcbddc->vec1_P;
1707     to = pcbddc->coarse_vec;
1708   }
1709   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1715 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1716 {
1717   PetscErrorCode ierr;
1718   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1719   PetscScalar    *array;
1720   Vec            from,to;
1721 
1722   PetscFunctionBegin;
1723   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1724     from = pcbddc->coarse_vec;
1725     to = pcbddc->vec1_P;
1726   } else { /* from local to global -> put data in coarse right hand side */
1727     from = pcbddc->vec1_P;
1728     to = pcbddc->coarse_vec;
1729   }
1730   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1731   if (smode == SCATTER_FORWARD) {
1732     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1733       Vec tvec;
1734 
1735       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1736       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1737       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1738       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1739     }
1740   } else {
1741     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1742      ierr = VecResetArray(from);CHKERRQ(ierr);
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* uncomment for testing purposes */
1749 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1750 #undef __FUNCT__
1751 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1752 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1753 {
1754   PetscErrorCode    ierr;
1755   PC_IS*            pcis = (PC_IS*)(pc->data);
1756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1758   /* one and zero */
1759   PetscScalar       one=1.0,zero=0.0;
1760   /* space to store constraints and their local indices */
1761   PetscScalar       *temp_quadrature_constraint;
1762   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1763   PetscInt          *aux_primal_numbering;
1764   /* iterators */
1765   PetscInt          i,j,k,total_counts,temp_start_ptr;
1766   /* BLAS integers */
1767   PetscBLASInt      lwork,lierr;
1768   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1769   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1770   /* reuse */
1771   PetscInt          olocal_primal_size;
1772   PetscInt          *oprimal_indices_local_idxs;
1773   /* change of basis */
1774   PetscBool         boolforchange,qr_needed;
1775   PetscBT           touched,change_basis,qr_needed_idx;
1776   /* auxiliary stuff */
1777   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1778   PetscInt          ncc;
1779   /* some quantities */
1780   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1781   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1782 
1783   PetscFunctionBegin;
1784   /* Destroy Mat objects computed previously */
1785   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1786   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1787   /* save info on constraints from previous setup (if any) */
1788   olocal_primal_size = pcbddc->local_primal_size;
1789   ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
1790   ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
1791   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1792 
1793   /* print some info */
1794   if (pcbddc->dbg_flag) {
1795     IS       vertices;
1796     PetscInt nv,nedges,nfaces;
1797     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1798     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1799     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1800     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1801     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1804     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1805     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1806   }
1807 
1808   if (!pcbddc->adaptive_selection) {
1809     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1810     MatNullSpace nearnullsp;
1811     const Vec    *nearnullvecs;
1812     Vec          *localnearnullsp;
1813     PetscScalar  *array;
1814     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1815     PetscBool    nnsp_has_cnst;
1816     /* LAPACK working arrays for SVD or POD */
1817     PetscBool    skip_lapack;
1818     PetscScalar  *work;
1819     PetscReal    *singular_vals;
1820 #if defined(PETSC_USE_COMPLEX)
1821     PetscReal    *rwork;
1822 #endif
1823 #if defined(PETSC_MISSING_LAPACK_GESVD)
1824     PetscScalar  *temp_basis,*correlation_mat;
1825 #else
1826     PetscBLASInt dummy_int=1;
1827     PetscScalar  dummy_scalar=1.;
1828 #endif
1829 
1830     /* Get index sets for faces, edges and vertices from graph */
1831     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1832     /* free unneeded index sets */
1833     if (!pcbddc->use_vertices) {
1834       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1835     }
1836     if (!pcbddc->use_edges) {
1837       for (i=0;i<n_ISForEdges;i++) {
1838         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1839       }
1840       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1841       n_ISForEdges = 0;
1842     }
1843     if (!pcbddc->use_faces) {
1844       for (i=0;i<n_ISForFaces;i++) {
1845         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1846       }
1847       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1848       n_ISForFaces = 0;
1849     }
1850     /* HACKS (the following two blocks of code) */
1851     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1852       pcbddc->use_change_of_basis = PETSC_TRUE;
1853       if (!ISForEdges) {
1854         pcbddc->use_change_on_faces = PETSC_TRUE;
1855       }
1856     }
1857     if (pcbddc->NullSpace) {
1858       /* use_change_of_basis should be consistent among processors */
1859       PetscBool tbool[2],gbool[2];
1860       tbool[0] = pcbddc->use_change_of_basis;
1861       tbool[1] = pcbddc->use_change_on_faces;
1862       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1863       pcbddc->use_change_of_basis = gbool[0];
1864       pcbddc->use_change_on_faces = gbool[1];
1865     }
1866 
1867     /* check if near null space is attached to global mat */
1868     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1869     if (nearnullsp) {
1870       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1871       /* remove any stored info */
1872       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1873       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1874       /* store information for BDDC solver reuse */
1875       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1876       pcbddc->onearnullspace = nearnullsp;
1877       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1878       for (i=0;i<nnsp_size;i++) {
1879         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1880       }
1881     } else { /* if near null space is not provided BDDC uses constants by default */
1882       nnsp_size = 0;
1883       nnsp_has_cnst = PETSC_TRUE;
1884     }
1885     /* get max number of constraints on a single cc */
1886     max_constraints = nnsp_size;
1887     if (nnsp_has_cnst) max_constraints++;
1888 
1889     /*
1890          Evaluate maximum storage size needed by the procedure
1891          - temp_indices will contain start index of each constraint stored as follows
1892          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1893          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1894          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1895                                                                                                                                                            */
1896     total_counts = n_ISForFaces+n_ISForEdges;
1897     total_counts *= max_constraints;
1898     n_vertices = 0;
1899     if (ISForVertices) {
1900       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1901     }
1902     total_counts += n_vertices;
1903     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1904     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1905     total_counts = 0;
1906     max_size_of_constraint = 0;
1907     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1908       if (i<n_ISForEdges) {
1909         used_IS = &ISForEdges[i];
1910       } else {
1911         used_IS = &ISForFaces[i-n_ISForEdges];
1912       }
1913       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1914       total_counts += j;
1915       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1916     }
1917     total_counts *= max_constraints;
1918     total_counts += n_vertices;
1919     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1920     /* get local part of global near null space vectors */
1921     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1922     for (k=0;k<nnsp_size;k++) {
1923       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1924       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1925       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1926     }
1927 
1928     /* whether or not to skip lapack calls */
1929     skip_lapack = PETSC_TRUE;
1930     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1931 
1932     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1933     if (!skip_lapack) {
1934       PetscScalar temp_work;
1935 
1936 #if defined(PETSC_MISSING_LAPACK_GESVD)
1937       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1938       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1939       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1940       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1941 #if defined(PETSC_USE_COMPLEX)
1942       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1943 #endif
1944       /* now we evaluate the optimal workspace using query with lwork=-1 */
1945       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1946       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1947       lwork = -1;
1948       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1949 #if !defined(PETSC_USE_COMPLEX)
1950       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1951 #else
1952       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1953 #endif
1954       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1955       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1956 #else /* on missing GESVD */
1957       /* SVD */
1958       PetscInt max_n,min_n;
1959       max_n = max_size_of_constraint;
1960       min_n = max_constraints;
1961       if (max_size_of_constraint < max_constraints) {
1962         min_n = max_size_of_constraint;
1963         max_n = max_constraints;
1964       }
1965       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1966 #if defined(PETSC_USE_COMPLEX)
1967       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1968 #endif
1969       /* now we evaluate the optimal workspace using query with lwork=-1 */
1970       lwork = -1;
1971       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1972       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1973       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1974       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1975 #if !defined(PETSC_USE_COMPLEX)
1976       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1977 #else
1978       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1979 #endif
1980       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1981       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1982 #endif /* on missing GESVD */
1983       /* Allocate optimal workspace */
1984       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1985       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1986     }
1987     /* Now we can loop on constraining sets */
1988     total_counts = 0;
1989     temp_indices[0] = 0;
1990     /* vertices */
1991     if (ISForVertices) {
1992       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1993       if (nnsp_has_cnst) { /* consider all vertices */
1994         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1995         for (i=0;i<n_vertices;i++) {
1996           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1997           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1998           total_counts++;
1999         }
2000       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2001         PetscBool used_vertex;
2002         for (i=0;i<n_vertices;i++) {
2003           used_vertex = PETSC_FALSE;
2004           k = 0;
2005           while (!used_vertex && k<nnsp_size) {
2006             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2007             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2008               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2009               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2010               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2011               total_counts++;
2012               used_vertex = PETSC_TRUE;
2013             }
2014             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2015             k++;
2016           }
2017         }
2018       }
2019       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2020       n_vertices = total_counts;
2021     }
2022 
2023     /* edges and faces */
2024     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2025       if (ncc<n_ISForEdges) {
2026         used_IS = &ISForEdges[ncc];
2027         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2028       } else {
2029         used_IS = &ISForFaces[ncc-n_ISForEdges];
2030         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2031       }
2032       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2033       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2034       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2035       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2036       /* change of basis should not be performed on local periodic nodes */
2037       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2038       if (nnsp_has_cnst) {
2039         PetscScalar quad_value;
2040         temp_constraints++;
2041         if (!pcbddc->use_nnsp_true) {
2042           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2043         } else {
2044           quad_value = 1.0;
2045         }
2046         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2047         for (j=0;j<size_of_constraint;j++) {
2048           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2049         }
2050         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2051         total_counts++;
2052       }
2053       for (k=0;k<nnsp_size;k++) {
2054         PetscReal real_value;
2055         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2056         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2057         for (j=0;j<size_of_constraint;j++) {
2058           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2059         }
2060         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2061         /* check if array is null on the connected component */
2062         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2063         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2064         if (real_value > 0.0) { /* keep indices and values */
2065           temp_constraints++;
2066           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2067           total_counts++;
2068         }
2069       }
2070       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2071       valid_constraints = temp_constraints;
2072       if (!pcbddc->use_nnsp_true && temp_constraints) {
2073         if (temp_constraints == 1) { /* just normalize the constraint */
2074           PetscScalar norm;
2075           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2076           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2077           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2078           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2079         } else { /* perform SVD */
2080           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2081 
2082 #if defined(PETSC_MISSING_LAPACK_GESVD)
2083           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2084              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2085              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2086                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2087                 from that computed using LAPACKgesvd
2088              -> This is due to a different computation of eigenvectors in LAPACKheev
2089              -> The quality of the POD-computed basis will be the same */
2090           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2091           /* Store upper triangular part of correlation matrix */
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2093           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2094           for (j=0;j<temp_constraints;j++) {
2095             for (k=0;k<j+1;k++) {
2096               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2097             }
2098           }
2099           /* compute eigenvalues and eigenvectors of correlation matrix */
2100           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2101           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2102 #if !defined(PETSC_USE_COMPLEX)
2103           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2104 #else
2105           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2106 #endif
2107           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2108           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2109           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2110           j = 0;
2111           while (j < temp_constraints && singular_vals[j] < tol) j++;
2112           total_counts = total_counts-j;
2113           valid_constraints = temp_constraints-j;
2114           /* scale and copy POD basis into used quadrature memory */
2115           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2116           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2117           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2118           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2119           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2120           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2121           if (j<temp_constraints) {
2122             PetscInt ii;
2123             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2124             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2125             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2126             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2127             for (k=0;k<temp_constraints-j;k++) {
2128               for (ii=0;ii<size_of_constraint;ii++) {
2129                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2130               }
2131             }
2132           }
2133 #else  /* on missing GESVD */
2134           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2136           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2137           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2138 #if !defined(PETSC_USE_COMPLEX)
2139           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2140 #else
2141           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2142 #endif
2143           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2144           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2145           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2146           k = temp_constraints;
2147           if (k > size_of_constraint) k = size_of_constraint;
2148           j = 0;
2149           while (j < k && singular_vals[k-j-1] < tol) j++;
2150           valid_constraints = k-j;
2151           total_counts = total_counts-temp_constraints+valid_constraints;
2152 #endif /* on missing GESVD */
2153         }
2154       }
2155       /* setting change_of_basis flag is safe now */
2156       if (boolforchange) {
2157         for (j=0;j<valid_constraints;j++) {
2158           PetscBTSet(change_basis,total_counts-j-1);
2159         }
2160       }
2161     }
2162     /* free workspace */
2163     if (!skip_lapack) {
2164       ierr = PetscFree(work);CHKERRQ(ierr);
2165 #if defined(PETSC_USE_COMPLEX)
2166       ierr = PetscFree(rwork);CHKERRQ(ierr);
2167 #endif
2168       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2169 #if defined(PETSC_MISSING_LAPACK_GESVD)
2170       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2171       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2172 #endif
2173     }
2174     for (k=0;k<nnsp_size;k++) {
2175       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2176     }
2177     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2178     /* free index sets of faces, edges and vertices */
2179     for (i=0;i<n_ISForFaces;i++) {
2180       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2181     }
2182     if (n_ISForFaces) {
2183       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2184     }
2185     for (i=0;i<n_ISForEdges;i++) {
2186       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2187     }
2188     if (n_ISForEdges) {
2189       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2190     }
2191     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2192   } else {
2193     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2194     PetscInt        cum = 0;
2195 
2196     total_counts = 0;
2197     n_vertices = 0;
2198     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2199       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2200     }
2201     max_constraints = 0;
2202     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2203       total_counts += pcbddc->adaptive_constraints_n[i];
2204       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2205     }
2206     temp_indices = pcbddc->adaptive_constraints_ptrs;
2207     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2208     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2209 
2210 #if 0
2211     printf("Found %d totals\n",total_counts);
2212     for (i=0;i<total_counts;i++) {
2213       printf("const %d, start %d",i,temp_indices[i]);
2214       printf(" end %d:\n",temp_indices[i+1]);
2215       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2216         printf("  idxs %d",temp_indices_to_constraint[j]);
2217         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2218       }
2219     }
2220     for (i=0;i<n_vertices;i++) {
2221       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2222     }
2223     for (i=0;i<sub_schurs->n_subs;i++) {
2224       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2225     }
2226 #endif
2227 
2228     max_size_of_constraint = 0;
2229     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2230     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2231     /* Change of basis */
2232     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2233     if (pcbddc->use_change_of_basis) {
2234       cum = n_vertices;
2235       for (i=0;i<sub_schurs->n_subs;i++) {
2236         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2237           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2238             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2239           }
2240         }
2241         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2242       }
2243     }
2244   }
2245 
2246   /* map temp_indices_to_constraint in boundary numbering */
2247   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2248   if (i != temp_indices[total_counts]) {
2249     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2250   }
2251 
2252   /* set quantities in pcbddc data structure and store previous primal size */
2253   /* n_vertices defines the number of subdomain corners in the primal space */
2254   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2255   pcbddc->local_primal_size = total_counts;
2256   pcbddc->n_vertices = n_vertices;
2257   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2258 
2259   /* Create constraint matrix */
2260   /* The constraint matrix is used to compute the l2g map of primal dofs */
2261   /* so we need to set it up properly either with or without change of basis */
2262   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2263   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2264   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2265   /* array to compute a local numbering of constraints : vertices first then constraints */
2266   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2267   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2268 
2269   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2270   total_primal_vertices=0;
2271   for (i=0;i<pcbddc->local_primal_size;i++) {
2272     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2273     if (size_of_constraint == 1) {
2274       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2275       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2276       total_primal_vertices++;
2277     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2278       /* find first (by global ordering) untouched node in the cc */
2279       k = 0;
2280       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2281       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+k]);CHKERRQ(ierr);
2282       aux_primal_numbering[total_primal_vertices++]=temp_indices_to_constraint[temp_indices[i]+k];
2283     }
2284   }
2285   /* determine if a QR strategy is needed for change of basis */
2286   qr_needed = PETSC_FALSE;
2287   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2288   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2289     if (PetscBTLookup(change_basis,i)) {
2290       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2291         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2292         j = 0;
2293         for (k=0;k<size_of_constraint;k++) {
2294           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2295             j++;
2296           }
2297         }
2298         /* found more than one primal dof on the cc */
2299         if (j > 1) {
2300           PetscBTSet(qr_needed_idx,i);
2301           qr_needed = PETSC_TRUE;
2302         }
2303       } else {
2304         PetscBTSet(qr_needed_idx,i);
2305         qr_needed = PETSC_TRUE;
2306       }
2307     }
2308   }
2309   total_counts = total_primal_vertices;
2310   for (i=0;i<pcbddc->local_primal_size;i++) {
2311     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2312     if (size_of_constraint != 1 && !PetscBTLookup(change_basis,i)) {
2313       /* find first (by global ordering) untouched node in the cc */
2314       k = 0;
2315       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2316       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+k]);CHKERRQ(ierr);
2317       aux_primal_numbering[total_counts++]=temp_indices_to_constraint[temp_indices[i]+k];
2318     }
2319   }
2320 
2321   /* permute indices in order to have a sorted set of vertices */
2322   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2323 
2324   /* get reference dofs for local primal dofs */
2325   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2326   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,pcbddc->local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2327   pcbddc->n_actual_vertices = total_primal_vertices;
2328 
2329   /* nonzero structure of constraint matrix */
2330   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2331   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2332   j=total_primal_vertices;
2333   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2334     if (!PetscBTLookup(change_basis,i)) {
2335       nnz[j]=temp_indices[i+1]-temp_indices[i];
2336       j++;
2337     }
2338   }
2339   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2340   ierr = PetscFree(nnz);CHKERRQ(ierr);
2341 
2342   /* set values in constraint matrix */
2343   for (i=0;i<total_primal_vertices;i++) {
2344     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2345   }
2346   total_counts = total_primal_vertices;
2347   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2348     if (!PetscBTLookup(change_basis,i)) {
2349       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2350       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2351       total_counts++;
2352     }
2353   }
2354   /* assembling */
2355   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2356   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357 
2358   /*
2359   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2360   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2361   */
2362   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2363   if (pcbddc->use_change_of_basis) {
2364     /* dual and primal dofs on a single cc */
2365     PetscInt     dual_dofs,primal_dofs;
2366     /* working stuff for GEQRF */
2367     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2368     PetscBLASInt lqr_work;
2369     /* working stuff for UNGQR */
2370     PetscScalar  *gqr_work,lgqr_work_t;
2371     PetscBLASInt lgqr_work;
2372     /* working stuff for TRTRS */
2373     PetscScalar  *trs_rhs;
2374     PetscBLASInt Blas_NRHS;
2375     /* pointers for values insertion into change of basis matrix */
2376     PetscInt     *start_rows,*start_cols;
2377     PetscScalar  *start_vals;
2378     /* working stuff for values insertion */
2379     PetscBT      is_primal;
2380     /* matrix sizes */
2381     PetscInt     global_size,local_size;
2382     /* temporary change of basis */
2383     Mat          localChangeOfBasisMatrix;
2384     /* extra space for debugging */
2385     PetscScalar  *dbg_work;
2386 
2387     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2388     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2389     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2390     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2391     /* nonzeros for local mat */
2392     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2393     for (i=0;i<pcis->n;i++) nnz[i]=1;
2394     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2395       if (PetscBTLookup(change_basis,i)) {
2396         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2397         if (PetscBTLookup(qr_needed_idx,i)) {
2398           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2399         } else {
2400           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2401           /* get local primal index on the cc */
2402           j = 0;
2403           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2404           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2405         }
2406       }
2407     }
2408     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2409     ierr = PetscFree(nnz);CHKERRQ(ierr);
2410     /* Set initial identity in the matrix */
2411     for (i=0;i<pcis->n;i++) {
2412       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2413     }
2414 
2415     if (pcbddc->dbg_flag) {
2416       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2417       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2418     }
2419 
2420 
2421     /* Now we loop on the constraints which need a change of basis */
2422     /*
2423        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2424        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2425 
2426        Basic blocks of change of basis matrix T computed by
2427 
2428           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2429 
2430             | 1        0   ...        0         s_1/S |
2431             | 0        1   ...        0         s_2/S |
2432             |              ...                        |
2433             | 0        ...            1     s_{n-1}/S |
2434             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2435 
2436             with S = \sum_{i=1}^n s_i^2
2437             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2438                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2439 
2440           - QR decomposition of constraints otherwise
2441     */
2442     if (qr_needed) {
2443       /* space to store Q */
2444       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2445       /* first we issue queries for optimal work */
2446       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2447       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2448       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2449       lqr_work = -1;
2450       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2451       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2452       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2453       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2454       lgqr_work = -1;
2455       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2456       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2457       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2458       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2459       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2460       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2461       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2462       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2463       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2464       /* array to store scaling factors for reflectors */
2465       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2466       /* array to store rhs and solution of triangular solver */
2467       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2468       /* allocating workspace for check */
2469       if (pcbddc->dbg_flag) {
2470         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2471       }
2472     }
2473     /* array to store whether a node is primal or not */
2474     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2475     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2476     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2477     if (i != total_primal_vertices) {
2478       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2479     }
2480     for (i=0;i<total_primal_vertices;i++) {
2481       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2482     }
2483     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2484 
2485     /* loop on constraints and see whether or not they need a change of basis and compute it */
2486     /* -> using implicit ordering contained in temp_indices data */
2487     total_counts = pcbddc->n_vertices;
2488     while (total_counts<pcbddc->local_primal_size) {
2489       primal_dofs = 1;
2490       if (PetscBTLookup(change_basis,total_counts)) {
2491         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2492         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2493           primal_dofs++;
2494         }
2495         /* get constraint info */
2496         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2497         dual_dofs = size_of_constraint-primal_dofs;
2498 
2499         if (pcbddc->dbg_flag) {
2500           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2501         }
2502 
2503         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2504 
2505           /* copy quadrature constraints for change of basis check */
2506           if (pcbddc->dbg_flag) {
2507             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2508           }
2509           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2510           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2511 
2512           /* compute QR decomposition of constraints */
2513           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2514           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2515           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2516           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2517           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2518           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2519           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2520 
2521           /* explictly compute R^-T */
2522           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2523           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2524           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2525           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2526           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2527           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2528           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2529           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2530           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2531           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2532 
2533           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2534           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2537           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2538           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2539           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2540           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2541           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2542 
2543           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2544              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2545              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2546           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2547           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2548           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2549           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2550           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2551           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2552           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2553           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2554           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2555           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2556 
2557           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2558           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2559           /* insert cols for primal dofs */
2560           for (j=0;j<primal_dofs;j++) {
2561             start_vals = &qr_basis[j*size_of_constraint];
2562             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+j];
2563             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2564           }
2565           /* insert cols for dual dofs */
2566           for (j=0,k=0;j<dual_dofs;k++) {
2567             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2568               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2569               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2570               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2571               j++;
2572             }
2573           }
2574 
2575           /* check change of basis */
2576           if (pcbddc->dbg_flag) {
2577             PetscInt   ii,jj;
2578             PetscBool valid_qr=PETSC_TRUE;
2579             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2580             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2581             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2582             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2583             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2584             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2585             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2586             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2587             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2588             for (jj=0;jj<size_of_constraint;jj++) {
2589               for (ii=0;ii<primal_dofs;ii++) {
2590                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2591                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2592               }
2593             }
2594             if (!valid_qr) {
2595               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2596               for (jj=0;jj<size_of_constraint;jj++) {
2597                 for (ii=0;ii<primal_dofs;ii++) {
2598                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2599                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2600                   }
2601                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2602                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2603                   }
2604                 }
2605               }
2606             } else {
2607               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2608             }
2609           }
2610         } else { /* simple transformation block */
2611           PetscInt    row,col;
2612           PetscScalar val,norm;
2613 
2614           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2615           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2616           for (j=0;j<size_of_constraint;j++) {
2617             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2618             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2619             if (!PetscBTLookup(is_primal,row_B)) {
2620               col = temp_indices_to_constraint[temp_indices[total_counts]];
2621               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2622               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2623             } else {
2624               for (k=0;k<size_of_constraint;k++) {
2625                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2626                 if (row != col) {
2627                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]];
2628                 } else {
2629                   val = temp_quadrature_constraint[temp_indices[total_counts]]/norm;
2630                 }
2631                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2632               }
2633             }
2634           }
2635           if (pcbddc->dbg_flag) {
2636             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2637           }
2638         }
2639       } else {
2640         if (pcbddc->dbg_flag) {
2641           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2642         }
2643       }
2644       /* increment constraint counter total_counts */
2645       total_counts += primal_dofs;
2646     }
2647 
2648     /* free workspace */
2649     if (qr_needed) {
2650       if (pcbddc->dbg_flag) {
2651         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2652       }
2653       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2654       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2655       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2656       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2657       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2658     }
2659     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2660     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2661     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2662 
2663     /* assembling of global change of variable */
2664     {
2665       Mat      tmat;
2666       PetscInt bs;
2667 
2668       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2669       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2670       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2671       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2672       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2673       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2674       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2675       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2676       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2677       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2678       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2679       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2680       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2681       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2682       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2683       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2684       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2685       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2686     }
2687     /* check */
2688     if (pcbddc->dbg_flag) {
2689       PetscReal error;
2690       Vec       x,x_change;
2691 
2692       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2693       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2694       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2695       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2696       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2697       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2698       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2699       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2700       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2701       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2702       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2703       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2704       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2705       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2706       ierr = VecDestroy(&x);CHKERRQ(ierr);
2707       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2708     }
2709 
2710     /* adapt sub_schurs computed (if any) */
2711     if (pcbddc->use_deluxe_scaling) {
2712       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2713       if (sub_schurs->S_Ej_all) {
2714         Mat S_new,tmat;
2715         IS is_all_N;
2716 
2717         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2718         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2719         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2720         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2721         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2722         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2723         sub_schurs->S_Ej_all = S_new;
2724         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2725         if (sub_schurs->sum_S_Ej_all) {
2726           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2727           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2728           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2729           sub_schurs->sum_S_Ej_all = S_new;
2730           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2731         }
2732         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2733       }
2734     }
2735     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2736   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2737     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2738     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2739   }
2740 
2741   /* set up change of basis context */
2742   if (pcbddc->ChangeOfBasisMatrix) {
2743     PCBDDCChange_ctx change_ctx;
2744 
2745     if (!pcbddc->new_global_mat) {
2746       PetscInt global_size,local_size;
2747 
2748       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2749       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2750       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2751       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2752       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2753       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2754       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2755       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2756       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2757     } else {
2758       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2759       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2760       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2761     }
2762     if (!pcbddc->user_ChangeOfBasisMatrix) {
2763       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2764       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2765     } else {
2766       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2767       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2768     }
2769     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2770     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2771     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2772     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   }
2774 
2775   /* get indices in local ordering for vertices and constraints */
2776   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2777 
2778   /* check if a new primal space has been introduced */
2779   pcbddc->new_primal_space_local = PETSC_TRUE;
2780   if (olocal_primal_size == pcbddc->local_primal_size) {
2781     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2782     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2783   }
2784   ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2785   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2786   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2787 
2788   /* flush dbg viewer */
2789   if (pcbddc->dbg_flag) {
2790     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2791   }
2792 
2793   /* free workspace */
2794   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2795   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2796   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2797   if (!pcbddc->adaptive_selection) {
2798     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2799     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2800   } else {
2801     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2802                       pcbddc->adaptive_constraints_ptrs,
2803                       pcbddc->adaptive_constraints_idxs,
2804                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2805     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2806   }
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2812 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2813 {
2814   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2815   PC_IS       *pcis = (PC_IS*)pc->data;
2816   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2817   PetscInt    ierr,i,vertex_size,N;
2818   PetscViewer viewer=pcbddc->dbg_viewer;
2819 
2820   PetscFunctionBegin;
2821   /* Reset previously computed graph */
2822   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2823   /* Init local Graph struct */
2824   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2825   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2826 
2827   /* Check validity of the csr graph passed in by the user */
2828   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2829     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2830   }
2831 
2832   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2833   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2834     PetscInt  *xadj,*adjncy;
2835     PetscInt  nvtxs;
2836     PetscBool flg_row=PETSC_FALSE;
2837 
2838     if (pcbddc->use_local_adj) {
2839 
2840       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2841       if (flg_row) {
2842         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2843         pcbddc->computed_rowadj = PETSC_TRUE;
2844       }
2845       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2846     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2847       IS                     is_dummy;
2848       ISLocalToGlobalMapping l2gmap_dummy;
2849       PetscInt               j,sum;
2850       PetscInt               *cxadj,*cadjncy;
2851       const PetscInt         *idxs;
2852       PCBDDCGraph            graph;
2853       PetscBT                is_on_boundary;
2854 
2855       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2856       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2857       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2858       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2859       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2860       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2861       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2862       if (flg_row) {
2863         graph->xadj = xadj;
2864         graph->adjncy = adjncy;
2865       }
2866       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2867       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2868       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2869 
2870       if (pcbddc->dbg_flag) {
2871         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2872         for (i=0;i<graph->ncc;i++) {
2873           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2874         }
2875       }
2876 
2877       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2878       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2879       for (i=0;i<pcis->n_B;i++) {
2880         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2881       }
2882       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2883 
2884       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2885       sum = 0;
2886       for (i=0;i<graph->ncc;i++) {
2887         PetscInt sizecc = 0;
2888         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2889           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2890             sizecc++;
2891           }
2892         }
2893         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2894           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2895             cxadj[graph->queue[j]] = sizecc;
2896           }
2897         }
2898         sum += sizecc*sizecc;
2899       }
2900       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2901       sum = 0;
2902       for (i=0;i<pcis->n;i++) {
2903         PetscInt temp = cxadj[i];
2904         cxadj[i] = sum;
2905         sum += temp;
2906       }
2907       cxadj[pcis->n] = sum;
2908       for (i=0;i<graph->ncc;i++) {
2909         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2910           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2911             PetscInt k,sizecc = 0;
2912             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2913               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2914                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2915                 sizecc++;
2916               }
2917             }
2918           }
2919         }
2920       }
2921       if (sum) {
2922         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2923       } else {
2924         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2925         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2926       }
2927       graph->xadj = 0;
2928       graph->adjncy = 0;
2929       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2930       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2931     }
2932   }
2933   if (pcbddc->dbg_flag) {
2934     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2935   }
2936 
2937   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2938   vertex_size = 1;
2939   if (pcbddc->user_provided_isfordofs) {
2940     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2941       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2942       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2943         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2944         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2945       }
2946       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2947       pcbddc->n_ISForDofs = 0;
2948       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2949     }
2950     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2951     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2952   } else {
2953     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2954       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2955       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2956       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2957         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2958       }
2959     }
2960   }
2961 
2962   /* Setup of Graph */
2963   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2964     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2965   }
2966   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2967     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2968   }
2969   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
2970 
2971   /* Graph's connected components analysis */
2972   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2973 
2974   /* print some info to stdout */
2975   if (pcbddc->dbg_flag) {
2976     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
2977   }
2978 
2979   /* mark topography has done */
2980   pcbddc->recompute_topography = PETSC_FALSE;
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 #undef __FUNCT__
2985 #define __FUNCT__ "PCBDDCSubsetNumbering"
2986 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2987 {
2988   Vec            local_vec,global_vec;
2989   IS             seqis,paris;
2990   VecScatter     scatter_ctx;
2991   PetscScalar    *array;
2992   PetscInt       *temp_global_dofs;
2993   PetscScalar    globalsum;
2994   PetscInt       i,j,s;
2995   PetscInt       nlocals,first_index,old_index,max_local,max_global;
2996   PetscMPIInt    rank_prec_comm,size_prec_comm;
2997   PetscInt       *dof_sizes,*dof_displs;
2998   PetscBool      first_found;
2999   PetscErrorCode ierr;
3000 
3001   PetscFunctionBegin;
3002   /* mpi buffers */
3003   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3004   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3005   j = ( !rank_prec_comm ? size_prec_comm : 0);
3006   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3007   /* get maximum size of subset */
3008   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3009   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3010   max_local = 0;
3011   for (i=0;i<n_local_dofs;i++) {
3012     if (max_local < temp_global_dofs[i] ) {
3013       max_local = temp_global_dofs[i];
3014     }
3015   }
3016   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3017   max_global++;
3018   max_local = 0;
3019   for (i=0;i<n_local_dofs;i++) {
3020     if (max_local < local_dofs[i] ) {
3021       max_local = local_dofs[i];
3022     }
3023   }
3024   max_local++;
3025   /* allocate workspace */
3026   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3027   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3028   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3029   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3030   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3031   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3032   /* create scatter */
3033   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3034   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3035   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3036   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3037   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3038   /* init array */
3039   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3040   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3041   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3042   if (local_dofs_mult) {
3043     for (i=0;i<n_local_dofs;i++) {
3044       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3045     }
3046   } else {
3047     for (i=0;i<n_local_dofs;i++) {
3048       array[local_dofs[i]]=1.0;
3049     }
3050   }
3051   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3052   /* scatter into global vec and get total number of global dofs */
3053   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3054   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3055   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3056   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3057   /* Fill global_vec with cumulative function for global numbering */
3058   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3059   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3060   nlocals = 0;
3061   first_index = -1;
3062   first_found = PETSC_FALSE;
3063   for (i=0;i<s;i++) {
3064     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3065       first_found = PETSC_TRUE;
3066       first_index = i;
3067     }
3068     nlocals += (PetscInt)PetscRealPart(array[i]);
3069   }
3070   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3071   if (!rank_prec_comm) {
3072     dof_displs[0]=0;
3073     for (i=1;i<size_prec_comm;i++) {
3074       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3075     }
3076   }
3077   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3078   if (first_found) {
3079     array[first_index] += (PetscScalar)nlocals;
3080     old_index = first_index;
3081     for (i=first_index+1;i<s;i++) {
3082       if (PetscRealPart(array[i]) > 0.1) {
3083         array[i] += array[old_index];
3084         old_index = i;
3085       }
3086     }
3087   }
3088   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3089   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3090   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3091   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3092   /* get global ordering of local dofs */
3093   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3094   if (local_dofs_mult) {
3095     for (i=0;i<n_local_dofs;i++) {
3096       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3097     }
3098   } else {
3099     for (i=0;i<n_local_dofs;i++) {
3100       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3101     }
3102   }
3103   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3104   /* free workspace */
3105   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3106   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3107   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3108   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3109   /* return pointer to global ordering of local dofs */
3110   *global_numbering_subset = temp_global_dofs;
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 #undef __FUNCT__
3115 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3116 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3117 {
3118   PetscInt       i,j;
3119   PetscScalar    *alphas;
3120   PetscErrorCode ierr;
3121 
3122   PetscFunctionBegin;
3123   /* this implements stabilized Gram-Schmidt */
3124   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3125   for (i=0;i<n;i++) {
3126     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3127     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3128     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3129   }
3130   ierr = PetscFree(alphas);CHKERRQ(ierr);
3131   PetscFunctionReturn(0);
3132 }
3133 
3134 #undef __FUNCT__
3135 #define __FUNCT__ "MatISGetSubassemblingPattern"
3136 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3137 {
3138   Mat             subdomain_adj;
3139   IS              new_ranks,ranks_send_to;
3140   MatPartitioning partitioner;
3141   Mat_IS          *matis;
3142   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3143   PetscInt        prank;
3144   PetscMPIInt     size,rank,color;
3145   PetscInt        *xadj,*adjncy,*oldranks;
3146   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3147   PetscInt        i,local_size,threshold=0;
3148   PetscErrorCode  ierr;
3149   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3150   PetscSubcomm    subcomm;
3151 
3152   PetscFunctionBegin;
3153   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3154   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3155   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3156 
3157   /* Get info on mapping */
3158   matis = (Mat_IS*)(mat->data);
3159   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3160   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3161 
3162   /* build local CSR graph of subdomains' connectivity */
3163   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3164   xadj[0] = 0;
3165   xadj[1] = PetscMax(n_neighs-1,0);
3166   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3167   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3168 
3169   if (threshold) {
3170     PetscInt xadj_count = 0;
3171     for (i=1;i<n_neighs;i++) {
3172       if (n_shared[i] > threshold) {
3173         adjncy[xadj_count] = neighs[i];
3174         adjncy_wgt[xadj_count] = n_shared[i];
3175         xadj_count++;
3176       }
3177     }
3178     xadj[1] = xadj_count;
3179   } else {
3180     if (xadj[1]) {
3181       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3182       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3183     }
3184   }
3185   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3186   if (use_square) {
3187     for (i=0;i<xadj[1];i++) {
3188       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3189     }
3190   }
3191   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3192 
3193   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3194 
3195   /*
3196     Restrict work on active processes only.
3197   */
3198   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3199   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3200   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3201   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3202   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3203   if (color) {
3204     ierr = PetscFree(xadj);CHKERRQ(ierr);
3205     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3206     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3207   } else {
3208     PetscInt coarsening_ratio;
3209     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3210     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3211     prank = rank;
3212     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3213     /*
3214     for (i=0;i<size;i++) {
3215       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3216     }
3217     */
3218     for (i=0;i<xadj[1];i++) {
3219       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3220     }
3221     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3222     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3223     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3224 
3225     /* Partition */
3226     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3227     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3228     if (use_vwgt) {
3229       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3230       v_wgt[0] = local_size;
3231       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3232     }
3233     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3234     coarsening_ratio = size/n_subdomains;
3235     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3236     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3237     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3238     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3239 
3240     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3241     if (contiguous) {
3242       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3243     } else {
3244       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3245     }
3246     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3247     /* clean up */
3248     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3249     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3250     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3251     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3252   }
3253   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3254 
3255   /* assemble parallel IS for sends */
3256   i = 1;
3257   if (color) i=0;
3258   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3259 
3260   /* get back IS */
3261   *is_sends = ranks_send_to;
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3266 
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatISSubassemble"
3269 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3270 {
3271   Mat                    local_mat;
3272   Mat_IS                 *matis;
3273   IS                     is_sends_internal;
3274   PetscInt               rows,cols,new_local_rows;
3275   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3276   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3277   ISLocalToGlobalMapping l2gmap;
3278   PetscInt*              l2gmap_indices;
3279   const PetscInt*        is_indices;
3280   MatType                new_local_type;
3281   /* buffers */
3282   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3283   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3284   PetscInt               *recv_buffer_idxs_local;
3285   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3286   /* MPI */
3287   MPI_Comm               comm,comm_n;
3288   PetscSubcomm           subcomm;
3289   PetscMPIInt            n_sends,n_recvs,commsize;
3290   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3291   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3292   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3293   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3294   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3295   PetscErrorCode         ierr;
3296 
3297   PetscFunctionBegin;
3298   /* TODO: add missing checks */
3299   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3300   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3301   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3302   PetscValidLogicalCollectiveInt(mat,nis,7);
3303   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3304   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3305   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3306   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3307   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3308   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3309   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3310   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3311     PetscInt mrows,mcols,mnrows,mncols;
3312     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3313     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3314     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3315     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3316     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3317     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3318   }
3319   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3320   PetscValidLogicalCollectiveInt(mat,bs,0);
3321   /* prepare IS for sending if not provided */
3322   if (!is_sends) {
3323     PetscBool pcontig = PETSC_TRUE;
3324     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3325     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3326   } else {
3327     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3328     is_sends_internal = is_sends;
3329   }
3330 
3331   /* get pointer of MATIS data */
3332   matis = (Mat_IS*)mat->data;
3333 
3334   /* get comm */
3335   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3336 
3337   /* compute number of sends */
3338   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3339   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3340 
3341   /* compute number of receives */
3342   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3343   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3344   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3345   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3346   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3347   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3348   ierr = PetscFree(iflags);CHKERRQ(ierr);
3349 
3350   /* restrict comm if requested */
3351   subcomm = 0;
3352   destroy_mat = PETSC_FALSE;
3353   if (restrict_comm) {
3354     PetscMPIInt color,subcommsize;
3355 
3356     color = 0;
3357     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3358     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3359     subcommsize = commsize - subcommsize;
3360     /* check if reuse has been requested */
3361     if (reuse == MAT_REUSE_MATRIX) {
3362       if (*mat_n) {
3363         PetscMPIInt subcommsize2;
3364         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3365         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3366         comm_n = PetscObjectComm((PetscObject)*mat_n);
3367       } else {
3368         comm_n = PETSC_COMM_SELF;
3369       }
3370     } else { /* MAT_INITIAL_MATRIX */
3371       PetscMPIInt rank;
3372 
3373       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3374       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3375       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3376       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3377       comm_n = PetscSubcommChild(subcomm);
3378     }
3379     /* flag to destroy *mat_n if not significative */
3380     if (color) destroy_mat = PETSC_TRUE;
3381   } else {
3382     comm_n = comm;
3383   }
3384 
3385   /* prepare send/receive buffers */
3386   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3387   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3388   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3389   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3390   if (nis) {
3391     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3392   }
3393 
3394   /* Get data from local matrices */
3395   if (!isdense) {
3396     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3397     /* TODO: See below some guidelines on how to prepare the local buffers */
3398     /*
3399        send_buffer_vals should contain the raw values of the local matrix
3400        send_buffer_idxs should contain:
3401        - MatType_PRIVATE type
3402        - PetscInt        size_of_l2gmap
3403        - PetscInt        global_row_indices[size_of_l2gmap]
3404        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3405     */
3406   } else {
3407     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3408     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3409     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3410     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3411     send_buffer_idxs[1] = i;
3412     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3413     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3414     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3415     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3416     for (i=0;i<n_sends;i++) {
3417       ilengths_vals[is_indices[i]] = len*len;
3418       ilengths_idxs[is_indices[i]] = len+2;
3419     }
3420   }
3421   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3422   /* additional is (if any) */
3423   if (nis) {
3424     PetscMPIInt psum;
3425     PetscInt j;
3426     for (j=0,psum=0;j<nis;j++) {
3427       PetscInt plen;
3428       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3429       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3430       psum += len+1; /* indices + lenght */
3431     }
3432     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3433     for (j=0,psum=0;j<nis;j++) {
3434       PetscInt plen;
3435       const PetscInt *is_array_idxs;
3436       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3437       send_buffer_idxs_is[psum] = plen;
3438       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3439       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3440       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3441       psum += plen+1; /* indices + lenght */
3442     }
3443     for (i=0;i<n_sends;i++) {
3444       ilengths_idxs_is[is_indices[i]] = psum;
3445     }
3446     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3447   }
3448 
3449   buf_size_idxs = 0;
3450   buf_size_vals = 0;
3451   buf_size_idxs_is = 0;
3452   for (i=0;i<n_recvs;i++) {
3453     buf_size_idxs += (PetscInt)olengths_idxs[i];
3454     buf_size_vals += (PetscInt)olengths_vals[i];
3455     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3456   }
3457   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3458   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3459   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3460 
3461   /* get new tags for clean communications */
3462   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3463   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3464   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3465 
3466   /* allocate for requests */
3467   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3468   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3469   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3470   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3471   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3472   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3473 
3474   /* communications */
3475   ptr_idxs = recv_buffer_idxs;
3476   ptr_vals = recv_buffer_vals;
3477   ptr_idxs_is = recv_buffer_idxs_is;
3478   for (i=0;i<n_recvs;i++) {
3479     source_dest = onodes[i];
3480     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3481     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3482     ptr_idxs += olengths_idxs[i];
3483     ptr_vals += olengths_vals[i];
3484     if (nis) {
3485       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3486       ptr_idxs_is += olengths_idxs_is[i];
3487     }
3488   }
3489   for (i=0;i<n_sends;i++) {
3490     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3491     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3492     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3493     if (nis) {
3494       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3495     }
3496   }
3497   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3498   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3499 
3500   /* assemble new l2g map */
3501   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3502   ptr_idxs = recv_buffer_idxs;
3503   new_local_rows = 0;
3504   for (i=0;i<n_recvs;i++) {
3505     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3506     ptr_idxs += olengths_idxs[i];
3507   }
3508   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3509   ptr_idxs = recv_buffer_idxs;
3510   new_local_rows = 0;
3511   for (i=0;i<n_recvs;i++) {
3512     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3513     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3514     ptr_idxs += olengths_idxs[i];
3515   }
3516   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3517   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3518   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3519 
3520   /* infer new local matrix type from received local matrices type */
3521   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3522   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3523   if (n_recvs) {
3524     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3525     ptr_idxs = recv_buffer_idxs;
3526     for (i=0;i<n_recvs;i++) {
3527       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3528         new_local_type_private = MATAIJ_PRIVATE;
3529         break;
3530       }
3531       ptr_idxs += olengths_idxs[i];
3532     }
3533     switch (new_local_type_private) {
3534       case MATDENSE_PRIVATE:
3535         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3536           new_local_type = MATSEQAIJ;
3537           bs = 1;
3538         } else { /* if I receive only 1 dense matrix */
3539           new_local_type = MATSEQDENSE;
3540           bs = 1;
3541         }
3542         break;
3543       case MATAIJ_PRIVATE:
3544         new_local_type = MATSEQAIJ;
3545         bs = 1;
3546         break;
3547       case MATBAIJ_PRIVATE:
3548         new_local_type = MATSEQBAIJ;
3549         break;
3550       case MATSBAIJ_PRIVATE:
3551         new_local_type = MATSEQSBAIJ;
3552         break;
3553       default:
3554         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3555         break;
3556     }
3557   } else { /* by default, new_local_type is seqdense */
3558     new_local_type = MATSEQDENSE;
3559     bs = 1;
3560   }
3561 
3562   /* create MATIS object if needed */
3563   if (reuse == MAT_INITIAL_MATRIX) {
3564     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3565     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3566   } else {
3567     /* it also destroys the local matrices */
3568     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3569   }
3570   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3571   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3572 
3573   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3574 
3575   /* Global to local map of received indices */
3576   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3577   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3578   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3579 
3580   /* restore attributes -> type of incoming data and its size */
3581   buf_size_idxs = 0;
3582   for (i=0;i<n_recvs;i++) {
3583     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3584     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3585     buf_size_idxs += (PetscInt)olengths_idxs[i];
3586   }
3587   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3588 
3589   /* set preallocation */
3590   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3591   if (!newisdense) {
3592     PetscInt *new_local_nnz=0;
3593 
3594     ptr_vals = recv_buffer_vals;
3595     ptr_idxs = recv_buffer_idxs_local;
3596     if (n_recvs) {
3597       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3598     }
3599     for (i=0;i<n_recvs;i++) {
3600       PetscInt j;
3601       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3602         for (j=0;j<*(ptr_idxs+1);j++) {
3603           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3604         }
3605       } else {
3606         /* TODO */
3607       }
3608       ptr_idxs += olengths_idxs[i];
3609     }
3610     if (new_local_nnz) {
3611       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3612       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3613       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3614       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3615       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3616       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3617     } else {
3618       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3619     }
3620     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3621   } else {
3622     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3623   }
3624 
3625   /* set values */
3626   ptr_vals = recv_buffer_vals;
3627   ptr_idxs = recv_buffer_idxs_local;
3628   for (i=0;i<n_recvs;i++) {
3629     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3630       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3631       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3632       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3633       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3634       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3635     } else {
3636       /* TODO */
3637     }
3638     ptr_idxs += olengths_idxs[i];
3639     ptr_vals += olengths_vals[i];
3640   }
3641   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3642   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3643   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3644   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3645   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3646   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3647 
3648 #if 0
3649   if (!restrict_comm) { /* check */
3650     Vec       lvec,rvec;
3651     PetscReal infty_error;
3652 
3653     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3654     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3655     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3656     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3657     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3658     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3659     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3660     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3661     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3662   }
3663 #endif
3664 
3665   /* assemble new additional is (if any) */
3666   if (nis) {
3667     PetscInt **temp_idxs,*count_is,j,psum;
3668 
3669     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3670     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3671     ptr_idxs = recv_buffer_idxs_is;
3672     psum = 0;
3673     for (i=0;i<n_recvs;i++) {
3674       for (j=0;j<nis;j++) {
3675         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3676         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3677         psum += plen;
3678         ptr_idxs += plen+1; /* shift pointer to received data */
3679       }
3680     }
3681     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3682     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3683     for (i=1;i<nis;i++) {
3684       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3685     }
3686     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3687     ptr_idxs = recv_buffer_idxs_is;
3688     for (i=0;i<n_recvs;i++) {
3689       for (j=0;j<nis;j++) {
3690         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3691         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3692         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3693         ptr_idxs += plen+1; /* shift pointer to received data */
3694       }
3695     }
3696     for (i=0;i<nis;i++) {
3697       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3698       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3699       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3700     }
3701     ierr = PetscFree(count_is);CHKERRQ(ierr);
3702     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3703     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3704   }
3705   /* free workspace */
3706   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3707   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3708   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3709   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3710   if (isdense) {
3711     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3712     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3713   } else {
3714     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3715   }
3716   if (nis) {
3717     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3718     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3719   }
3720   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3721   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3722   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3723   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3724   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3725   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3726   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3727   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3728   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3729   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3730   ierr = PetscFree(onodes);CHKERRQ(ierr);
3731   if (nis) {
3732     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3733     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3734     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3735   }
3736   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3737   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3738     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3739     for (i=0;i<nis;i++) {
3740       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3741     }
3742   }
3743   PetscFunctionReturn(0);
3744 }
3745 
3746 /* temporary hack into ksp private data structure */
3747 #include <petsc/private/kspimpl.h>
3748 
3749 #undef __FUNCT__
3750 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3751 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3752 {
3753   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3754   PC_IS                  *pcis = (PC_IS*)pc->data;
3755   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3756   MatNullSpace           CoarseNullSpace=NULL;
3757   ISLocalToGlobalMapping coarse_islg;
3758   IS                     coarse_is,*isarray;
3759   PetscInt               i,im_active=-1,active_procs=-1;
3760   PetscInt               nis,nisdofs,nisneu;
3761   PC                     pc_temp;
3762   PCType                 coarse_pc_type;
3763   KSPType                coarse_ksp_type;
3764   PetscBool              multilevel_requested,multilevel_allowed;
3765   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3766   Mat                    t_coarse_mat_is;
3767   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3768   PetscMPIInt            all_procs;
3769   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3770   PetscBool              compute_vecs = PETSC_FALSE;
3771   PetscScalar            *array;
3772   PetscErrorCode         ierr;
3773 
3774   PetscFunctionBegin;
3775   /* Assign global numbering to coarse dofs */
3776   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3777     PetscInt ocoarse_size;
3778     compute_vecs = PETSC_TRUE;
3779     ocoarse_size = pcbddc->coarse_size;
3780     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3781     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3782     /* see if we can avoid some work */
3783     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3784       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3785         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3786         coarse_reuse = PETSC_FALSE;
3787       } else { /* we can safely reuse already computed coarse matrix */
3788         coarse_reuse = PETSC_TRUE;
3789       }
3790     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3791       coarse_reuse = PETSC_FALSE;
3792     }
3793     /* reset any subassembling information */
3794     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3795     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3796   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3797     coarse_reuse = PETSC_TRUE;
3798   }
3799 
3800   /* count "active" (i.e. with positive local size) and "void" processes */
3801   im_active = !!(pcis->n);
3802   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3803   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3804   void_procs = all_procs-active_procs;
3805   csin_type_simple = PETSC_TRUE;
3806   redist = PETSC_FALSE;
3807   if (pcbddc->current_level && void_procs) {
3808     csin_ml = PETSC_TRUE;
3809     ncoarse_ml = void_procs;
3810     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3811     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3812       csin_ds = PETSC_TRUE;
3813       ncoarse_ds = pcbddc->redistribute_coarse;
3814       redist = PETSC_TRUE;
3815     } else {
3816       csin_ds = PETSC_TRUE;
3817       ncoarse_ds = active_procs;
3818       redist = PETSC_TRUE;
3819     }
3820   } else {
3821     csin_ml = PETSC_FALSE;
3822     ncoarse_ml = all_procs;
3823     if (void_procs) {
3824       csin_ds = PETSC_TRUE;
3825       ncoarse_ds = void_procs;
3826       csin_type_simple = PETSC_FALSE;
3827     } else {
3828       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3829         csin_ds = PETSC_TRUE;
3830         ncoarse_ds = pcbddc->redistribute_coarse;
3831         redist = PETSC_TRUE;
3832       } else {
3833         csin_ds = PETSC_FALSE;
3834         ncoarse_ds = all_procs;
3835       }
3836     }
3837   }
3838 
3839   /*
3840     test if we can go multilevel: three conditions must be satisfied:
3841     - we have not exceeded the number of levels requested
3842     - we can actually subassemble the active processes
3843     - we can find a suitable number of MPI processes where we can place the subassembled problem
3844   */
3845   multilevel_allowed = PETSC_FALSE;
3846   multilevel_requested = PETSC_FALSE;
3847   if (pcbddc->current_level < pcbddc->max_levels) {
3848     multilevel_requested = PETSC_TRUE;
3849     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3850       multilevel_allowed = PETSC_FALSE;
3851     } else {
3852       multilevel_allowed = PETSC_TRUE;
3853     }
3854   }
3855   /* determine number of process partecipating to coarse solver */
3856   if (multilevel_allowed) {
3857     ncoarse = ncoarse_ml;
3858     csin = csin_ml;
3859     redist = PETSC_FALSE;
3860   } else {
3861     ncoarse = ncoarse_ds;
3862     csin = csin_ds;
3863   }
3864 
3865   /* creates temporary l2gmap and IS for coarse indexes */
3866   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3867   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3868 
3869   /* creates temporary MATIS object for coarse matrix */
3870   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3871   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3872   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3873   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3874 #if 0
3875   {
3876     PetscViewer viewer;
3877     char filename[256];
3878     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3879     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3880     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3881     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3882     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3883   }
3884 #endif
3885   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3886   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3887   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3888   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3889   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3890 
3891   /* compute dofs splitting and neumann boundaries for coarse dofs */
3892   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3893     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3894     const PetscInt         *idxs;
3895     ISLocalToGlobalMapping tmap;
3896 
3897     /* create map between primal indices (in local representative ordering) and local primal numbering */
3898     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3899     /* allocate space for temporary storage */
3900     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3901     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3902     /* allocate for IS array */
3903     nisdofs = pcbddc->n_ISForDofsLocal;
3904     nisneu = !!pcbddc->NeumannBoundariesLocal;
3905     nis = nisdofs + nisneu;
3906     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3907     /* dofs splitting */
3908     for (i=0;i<nisdofs;i++) {
3909       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3910       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3911       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3912       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3913       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3914       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3915       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3916       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3917     }
3918     /* neumann boundaries */
3919     if (pcbddc->NeumannBoundariesLocal) {
3920       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3921       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3922       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3923       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3924       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3925       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3926       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3927       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3928     }
3929     /* free memory */
3930     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3931     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3932     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3933   } else {
3934     nis = 0;
3935     nisdofs = 0;
3936     nisneu = 0;
3937     isarray = NULL;
3938   }
3939   /* destroy no longer needed map */
3940   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3941 
3942   /* restrict on coarse candidates (if needed) */
3943   coarse_mat_is = NULL;
3944   if (csin) {
3945     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3946       if (redist) {
3947         PetscMPIInt rank;
3948         PetscInt    spc,n_spc_p1,dest[1],destsize;
3949 
3950         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3951         spc = active_procs/ncoarse;
3952         n_spc_p1 = active_procs%ncoarse;
3953         if (im_active) {
3954           destsize = 1;
3955           if (rank > n_spc_p1*(spc+1)-1) {
3956             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3957           } else {
3958             dest[0] = rank/(spc+1);
3959           }
3960         } else {
3961           destsize = 0;
3962         }
3963         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3964       } else if (csin_type_simple) {
3965         PetscMPIInt rank;
3966         PetscInt    issize,isidx;
3967 
3968         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3969         if (im_active) {
3970           issize = 1;
3971           isidx = (PetscInt)rank;
3972         } else {
3973           issize = 0;
3974           isidx = -1;
3975         }
3976         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3977       } else { /* get a suitable subassembling pattern from MATIS code */
3978         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3979       }
3980 
3981       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
3982       if (!redist || ncoarse <= void_procs) {
3983         PetscInt ncoarse_cand,tissize,*nisindices;
3984         PetscInt *coarse_candidates;
3985         const PetscInt* tisindices;
3986 
3987         /* get coarse candidates' ranks in pc communicator */
3988         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3989         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3990         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
3991           if (!coarse_candidates[i]) {
3992             coarse_candidates[ncoarse_cand++]=i;
3993           }
3994         }
3995         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
3996 
3997 
3998         if (pcbddc->dbg_flag) {
3999           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4000           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4001           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4002           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4003           for (i=0;i<ncoarse_cand;i++) {
4004             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4005           }
4006           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4007           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4008         }
4009         /* shift the pattern on coarse candidates */
4010         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4011         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4012         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4013         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4014         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4015         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4016         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4017       }
4018       if (pcbddc->dbg_flag) {
4019         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4020         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4021         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4022         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4023       }
4024     }
4025     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4026     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4027   } else {
4028     if (pcbddc->dbg_flag) {
4029       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4030       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4031       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4032     }
4033     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4034     coarse_mat_is = t_coarse_mat_is;
4035   }
4036 
4037   /* create local to global scatters for coarse problem */
4038   if (compute_vecs) {
4039     PetscInt lrows;
4040     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4041     if (coarse_mat_is) {
4042       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4043     } else {
4044       lrows = 0;
4045     }
4046     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4047     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4048     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4049     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4050     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4051   }
4052   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4053   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4054 
4055   /* set defaults for coarse KSP and PC */
4056   if (multilevel_allowed) {
4057     coarse_ksp_type = KSPRICHARDSON;
4058     coarse_pc_type = PCBDDC;
4059   } else {
4060     coarse_ksp_type = KSPPREONLY;
4061     coarse_pc_type = PCREDUNDANT;
4062   }
4063 
4064   /* print some info if requested */
4065   if (pcbddc->dbg_flag) {
4066     if (!multilevel_allowed) {
4067       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4068       if (multilevel_requested) {
4069         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4070       } else if (pcbddc->max_levels) {
4071         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4072       }
4073       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4074     }
4075   }
4076 
4077   /* create the coarse KSP object only once with defaults */
4078   if (coarse_mat_is) {
4079     MatReuse coarse_mat_reuse;
4080     PetscViewer dbg_viewer = NULL;
4081     if (pcbddc->dbg_flag) {
4082       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4083       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4084     }
4085     if (!pcbddc->coarse_ksp) {
4086       char prefix[256],str_level[16];
4087       size_t len;
4088       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4089       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4090       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4091       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4092       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4093       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4094       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4095       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4096       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4097       /* prefix */
4098       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4099       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4100       if (!pcbddc->current_level) {
4101         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4102         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4103       } else {
4104         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4105         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4106         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4107         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4108         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4109         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4110       }
4111       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4112       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4113       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4114       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4115       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4116       /* allow user customization */
4117       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4118     }
4119     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4120     if (nisdofs) {
4121       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4122       for (i=0;i<nisdofs;i++) {
4123         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4124       }
4125     }
4126     if (nisneu) {
4127       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4128       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4129     }
4130 
4131     /* get some info after set from options */
4132     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4133     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4134     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4135     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4136     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4137       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4138       isbddc = PETSC_FALSE;
4139     }
4140     if (isredundant) {
4141       KSP inner_ksp;
4142       PC inner_pc;
4143       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4144       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4145       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4146     }
4147 
4148     /* assemble coarse matrix */
4149     if (coarse_reuse) {
4150       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4151       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4152       coarse_mat_reuse = MAT_REUSE_MATRIX;
4153     } else {
4154       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4155     }
4156     if (isbddc || isnn) {
4157       if (pcbddc->coarsening_ratio > 1) {
4158         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4159           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4160           if (pcbddc->dbg_flag) {
4161             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4162             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4163             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4164             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4165           }
4166         }
4167         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4168       } else {
4169         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4170         coarse_mat = coarse_mat_is;
4171       }
4172     } else {
4173       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4174     }
4175     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4176 
4177     /* propagate symmetry info to coarse matrix */
4178     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4179     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4180 
4181     /* set operators */
4182     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4183     if (pcbddc->dbg_flag) {
4184       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4185     }
4186   } else { /* processes non partecipating to coarse solver (if any) */
4187     coarse_mat = 0;
4188   }
4189   ierr = PetscFree(isarray);CHKERRQ(ierr);
4190 #if 0
4191   {
4192     PetscViewer viewer;
4193     char filename[256];
4194     sprintf(filename,"coarse_mat.m");
4195     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4196     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4197     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4198     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4199   }
4200 #endif
4201 
4202   /* Compute coarse null space (special handling by BDDC only) */
4203   if (pcbddc->NullSpace) {
4204     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4205   }
4206 
4207   if (pcbddc->coarse_ksp) {
4208     Vec crhs,csol;
4209     PetscBool ispreonly;
4210     if (CoarseNullSpace) {
4211       if (isbddc) {
4212         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4213       } else {
4214         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4215       }
4216     }
4217     /* setup coarse ksp */
4218     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4219     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4220     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4221     /* hack */
4222     if (!csol) {
4223       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4224     }
4225     if (!crhs) {
4226       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4227     }
4228     /* Check coarse problem if in debug mode or if solving with an iterative method */
4229     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4230     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4231       KSP       check_ksp;
4232       KSPType   check_ksp_type;
4233       PC        check_pc;
4234       Vec       check_vec,coarse_vec;
4235       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4236       PetscInt  its;
4237       PetscBool compute_eigs;
4238       PetscReal *eigs_r,*eigs_c;
4239       PetscInt  neigs;
4240       const char *prefix;
4241 
4242       /* Create ksp object suitable for estimation of extreme eigenvalues */
4243       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4244       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4245       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4246       if (ispreonly) {
4247         check_ksp_type = KSPPREONLY;
4248         compute_eigs = PETSC_FALSE;
4249       } else {
4250         check_ksp_type = KSPGMRES;
4251         compute_eigs = PETSC_TRUE;
4252       }
4253       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4254       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4255       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4256       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4257       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4258       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4259       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4260       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4261       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4262       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4263       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4264       /* create random vec */
4265       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4266       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4267       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4268       if (CoarseNullSpace) {
4269         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4270       }
4271       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4272       /* solve coarse problem */
4273       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4274       if (CoarseNullSpace) {
4275         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4276       }
4277       /* set eigenvalue estimation if preonly has not been requested */
4278       if (compute_eigs) {
4279         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4280         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4281         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4282         lambda_max = eigs_r[neigs-1];
4283         lambda_min = eigs_r[0];
4284         if (pcbddc->use_coarse_estimates) {
4285           if (lambda_max>lambda_min) {
4286             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4287             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4288           }
4289         }
4290       }
4291 
4292       /* check coarse problem residual error */
4293       if (pcbddc->dbg_flag) {
4294         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4295         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4296         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4297         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4298         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4299         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4300         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4301         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4302         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4303         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4304         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4305         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4306         if (compute_eigs) {
4307           PetscReal lambda_max_s,lambda_min_s;
4308           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4309           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4310           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4311           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4312           for (i=0;i<neigs;i++) {
4313             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4314           }
4315         }
4316         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4317         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4318       }
4319       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4320       if (compute_eigs) {
4321         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4322         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4323       }
4324     }
4325   }
4326   /* print additional info */
4327   if (pcbddc->dbg_flag) {
4328     /* waits until all processes reaches this point */
4329     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4330     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4331     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4332   }
4333 
4334   /* free memory */
4335   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4336   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4337   PetscFunctionReturn(0);
4338 }
4339 
4340 #undef __FUNCT__
4341 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4342 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4343 {
4344   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4345   PC_IS*         pcis = (PC_IS*)pc->data;
4346   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4347   PetscInt       i,coarse_size=0;
4348   PetscInt       *local_primal_indices=NULL;
4349   PetscErrorCode ierr;
4350 
4351   PetscFunctionBegin;
4352   /* Compute global number of coarse dofs */
4353   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4354     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4355   }
4356   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4357 
4358   /* check numbering */
4359   if (pcbddc->dbg_flag) {
4360     PetscScalar coarsesum,*array;
4361     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4362 
4363     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4364     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4365     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4366     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4367     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4368     for (i=0;i<pcbddc->local_primal_size;i++) {
4369       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4370     }
4371     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4372     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4373     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4374     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4375     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4376     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4377     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4378     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4379     for (i=0;i<pcis->n;i++) {
4380       if (array[i] == 1.0) {
4381         set_error = PETSC_TRUE;
4382         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4383       }
4384     }
4385     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4386     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4387     for (i=0;i<pcis->n;i++) {
4388       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4389     }
4390     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4391     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4392     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4393     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4394     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4395     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4396     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4397       PetscInt *gidxs;
4398 
4399       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4400       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4401       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4402       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4403       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4404       for (i=0;i<pcbddc->local_primal_size;i++) {
4405         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4406       }
4407       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4408       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4409     }
4410     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4411     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4412   }
4413   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4414   /* get back data */
4415   *coarse_size_n = coarse_size;
4416   *local_primal_indices_n = local_primal_indices;
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 #undef __FUNCT__
4421 #define __FUNCT__ "PCBDDCGlobalToLocal"
4422 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4423 {
4424   IS             localis_t;
4425   PetscInt       i,lsize,*idxs,n;
4426   PetscScalar    *vals;
4427   PetscErrorCode ierr;
4428 
4429   PetscFunctionBegin;
4430   /* get indices in local ordering exploiting local to global map */
4431   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4432   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4433   for (i=0;i<lsize;i++) vals[i] = 1.0;
4434   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4435   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4436   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4437   if (idxs) { /* multilevel guard */
4438     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4439   }
4440   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4441   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4442   ierr = PetscFree(vals);CHKERRQ(ierr);
4443   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4444   /* now compute set in local ordering */
4445   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4446   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4447   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4448   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4449   for (i=0,lsize=0;i<n;i++) {
4450     if (PetscRealPart(vals[i]) > 0.5) {
4451       lsize++;
4452     }
4453   }
4454   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4455   for (i=0,lsize=0;i<n;i++) {
4456     if (PetscRealPart(vals[i]) > 0.5) {
4457       idxs[lsize++] = i;
4458     }
4459   }
4460   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4461   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4462   *localis = localis_t;
4463   PetscFunctionReturn(0);
4464 }
4465 
4466 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4467 #undef __FUNCT__
4468 #define __FUNCT__ "PCBDDCMatMult_Private"
4469 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4470 {
4471   PCBDDCChange_ctx change_ctx;
4472   PetscErrorCode   ierr;
4473 
4474   PetscFunctionBegin;
4475   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4476   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4477   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4478   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4479   PetscFunctionReturn(0);
4480 }
4481 
4482 #undef __FUNCT__
4483 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4484 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4485 {
4486   PCBDDCChange_ctx change_ctx;
4487   PetscErrorCode   ierr;
4488 
4489   PetscFunctionBegin;
4490   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4491   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4492   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4493   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 #undef __FUNCT__
4498 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4499 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4500 {
4501   PC_IS               *pcis=(PC_IS*)pc->data;
4502   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4503   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4504   Mat                 S_j;
4505   PetscInt            *used_xadj,*used_adjncy;
4506   PetscBool           free_used_adj;
4507   PetscErrorCode      ierr;
4508 
4509   PetscFunctionBegin;
4510   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4511   free_used_adj = PETSC_FALSE;
4512   if (pcbddc->sub_schurs_layers == -1) {
4513     used_xadj = NULL;
4514     used_adjncy = NULL;
4515   } else {
4516     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4517       used_xadj = pcbddc->mat_graph->xadj;
4518       used_adjncy = pcbddc->mat_graph->adjncy;
4519     } else if (pcbddc->computed_rowadj) {
4520       used_xadj = pcbddc->mat_graph->xadj;
4521       used_adjncy = pcbddc->mat_graph->adjncy;
4522     } else {
4523       PetscBool      flg_row=PETSC_FALSE;
4524       const PetscInt *xadj,*adjncy;
4525       PetscInt       nvtxs;
4526 
4527       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4528       if (flg_row) {
4529         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4530         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4531         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4532         free_used_adj = PETSC_TRUE;
4533       } else {
4534         pcbddc->sub_schurs_layers = -1;
4535         used_xadj = NULL;
4536         used_adjncy = NULL;
4537       }
4538       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4539     }
4540   }
4541 
4542   /* setup sub_schurs data */
4543   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4544   if (!sub_schurs->use_mumps) {
4545     /* pcbddc->ksp_D up to date only if not using MUMPS */
4546     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4547     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4548   } else {
4549     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4550   }
4551   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4552 
4553   /* free adjacency */
4554   if (free_used_adj) {
4555     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4556   }
4557   PetscFunctionReturn(0);
4558 }
4559 
4560 #undef __FUNCT__
4561 #define __FUNCT__ "PCBDDCInitSubSchurs"
4562 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4563 {
4564   PC_IS               *pcis=(PC_IS*)pc->data;
4565   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4566   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4567   PCBDDCGraph         graph;
4568   PetscErrorCode      ierr;
4569 
4570   PetscFunctionBegin;
4571   /* attach interface graph for determining subsets */
4572   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4573     IS verticesIS;
4574 
4575     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4576     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4577     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4578     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4579     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4580     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4581 /*
4582     if (pcbddc->dbg_flag) {
4583       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4584     }
4585 */
4586   } else {
4587     graph = pcbddc->mat_graph;
4588   }
4589 
4590   /* sub_schurs init */
4591   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4592 
4593   /* free graph struct */
4594   if (pcbddc->sub_schurs_rebuild) {
4595     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4596   }
4597   PetscFunctionReturn(0);
4598 }
4599