xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 70022509d1c65f3f7cbed7e48cb9cb7fb9aa7ea1)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
124                       nv+cum,&pcbddc->adaptive_constraints_idxs,
125                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
126   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   maxneigs = 0;
129   cum = cum2 = cumarray = 0;
130   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
131     const PetscInt *idxs;
132 
133     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
134     for (cum=0;cum<nv;cum++) {
135       pcbddc->adaptive_constraints_n[cum] = 1;
136       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
137       pcbddc->adaptive_constraints_ptrs[cum] = cum;
138       pcbddc->adaptive_constraints_data[cum] = 1.0;
139     }
140     cum2 = cum;
141     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
142   }
143 
144   if (mss) { /* multilevel */
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
147   }
148 
149   for (i=0;i<sub_schurs->n_subs;i++) {
150     PetscInt j,subset_size;
151 
152     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
153     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
154       const PetscInt *idxs;
155       PetscReal      infty = PETSC_MAX_REAL;
156       PetscInt       eigs_start = 0;
157       PetscBLASInt   B_N;
158 
159       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
160       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
161         if (sub_schurs->is_hermitian) {
162           PetscInt j;
163           for (j=0;j<subset_size;j++) {
164             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
165           }
166           for (j=0;j<subset_size;j++) {
167             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
168           }
169         } else {
170           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         }
173       } else {
174         S = Sarray + cumarray;
175         St = Starray + cumarray;
176       }
177 
178       /* Threshold: this is an heuristic for edges */
179       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
180       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
181 
182       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
183         PetscBLASInt B_itype = 1;
184         PetscBLASInt B_IL, B_IU;
185         PetscReal    eps = -1.0; /* dlamch? */
186         PetscInt     nmin_s;
187 
188         /* ask for eigenvalues larger than thresh */
189         if (pcbddc->dbg_flag) {
190           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
191         }
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           eigs_start = B_neigs -nmax;
214           B_neigs = nmax;
215         }
216 
217         nmin_s = PetscMin(nmin,B_N);
218         if (B_neigs < nmin_s) {
219           PetscBLASInt B_neigs2;
220 
221           B_IU = B_N - B_neigs;
222           B_IL = B_N - nmin_s + 1;
223           if (pcbddc->dbg_flag) {
224             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
225           }
226           if (sub_schurs->is_hermitian) {
227             PetscInt j;
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231             for (j=0;j<subset_size;j++) {
232               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
233             }
234           } else {
235             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237           }
238           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
239 #if defined(PETSC_USE_COMPLEX)
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
241 #else
242           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
243 #endif
244           ierr = PetscFPTrapPop();CHKERRQ(ierr);
245           B_neigs += B_neigs2;
246         }
247         if (B_ierr) {
248           if (B_ierr < 0 ) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
250           } else if (B_ierr <= B_N) {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
252           } else {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
254           }
255         }
256         if (pcbddc->dbg_flag) {
257           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
258           for (j=0;j<B_neigs;j++) {
259             if (eigs[j] == 0.0) {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
261             } else {
262               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
263             }
264           }
265         }
266       } else {
267           /* TODO */
268       }
269 
270       maxneigs = PetscMax(B_neigs,maxneigs);
271       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+cum2]);
280           }
281         }
282       }
283       for (j=0;j<B_neigs;j++) {
284 #if 0
285         {
286           PetscBLASInt Blas_N,Blas_one = 1.0;
287           PetscScalar norm;
288           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
289           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
290           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
291             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
292           } else {
293             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
294           }
295           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
296         }
297 #endif
298         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
299         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
300         cum2 += subset_size;
301       }
302       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
303     }
304     /* shift for next computation */
305     cumarray += subset_size*subset_size;
306   }
307   if (pcbddc->dbg_flag) {
308     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
309   }
310   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
311 
312   if (mss) {
313     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
314     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
315     /* destroy matrices (junk) */
316     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
317     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
318   }
319   if (allocated_S_St) {
320     ierr = PetscFree2(S,St);CHKERRQ(ierr);
321   }
322   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     Mat         C_B,CPHI;
974     IS          is_dummy;
975     Vec         mones;
976     MatType     checkmattype=MATSEQAIJ;
977     PetscReal   real_value;
978 
979     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
981     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
982     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
983     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
984     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
985     if (unsymmetric_check) {
986       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
987       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
988     }
989     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
990 
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
993     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
994     if (unsymmetric_check) {
995       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1006       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1007     } else {
1008       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1009       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1014       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1015       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1016     }
1017     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1021     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1024     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1025 
1026     /* check constraints */
1027     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1028     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1029     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1030     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1031     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1032     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1033     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1034     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1035     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1036     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1038     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1039 
1040     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1041     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1042     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1043     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1044     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1045     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1046     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1047     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1048     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1049     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1050     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1053       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1054     }
1055     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1056   }
1057 
1058   /* free memory */
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (n_vertices) {
1061     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1062   }
1063   if (n_constraints) {
1064     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1065   }
1066   /* get back data */
1067   *coarse_submat_vals_n = coarse_submat_vals;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1073 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1074 {
1075   Mat            *work_mat;
1076   IS             isrow_s,iscol_s;
1077   PetscBool      rsorted,csorted;
1078   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1083   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1084   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1085   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1086 
1087   if (!rsorted) {
1088     const PetscInt *idxs;
1089     PetscInt *idxs_sorted,i;
1090 
1091     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1092     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1093     for (i=0;i<rsize;i++) {
1094       idxs_perm_r[i] = i;
1095     }
1096     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1097     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1098     for (i=0;i<rsize;i++) {
1099       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1100     }
1101     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1102     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1105     isrow_s = isrow;
1106   }
1107 
1108   if (!csorted) {
1109     if (isrow == iscol) {
1110       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1111       iscol_s = isrow_s;
1112     } else {
1113       const PetscInt *idxs;
1114       PetscInt *idxs_sorted,i;
1115 
1116       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1117       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1118       for (i=0;i<csize;i++) {
1119         idxs_perm_c[i] = i;
1120       }
1121       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1122       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1123       for (i=0;i<csize;i++) {
1124         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1125       }
1126       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1127       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1128     }
1129   } else {
1130     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1131     iscol_s = iscol;
1132   }
1133 
1134   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1135 
1136   if (!rsorted || !csorted) {
1137     Mat      new_mat;
1138     IS       is_perm_r,is_perm_c;
1139 
1140     if (!rsorted) {
1141       PetscInt *idxs_r,i;
1142       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1143       for (i=0;i<rsize;i++) {
1144         idxs_r[idxs_perm_r[i]] = i;
1145       }
1146       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1147       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1148     } else {
1149       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1150     }
1151     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1152 
1153     if (!csorted) {
1154       if (isrow_s == iscol_s) {
1155         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1156         is_perm_c = is_perm_r;
1157       } else {
1158         PetscInt *idxs_c,i;
1159         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1160         for (i=0;i<csize;i++) {
1161           idxs_c[idxs_perm_c[i]] = i;
1162         }
1163         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1164         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1165       }
1166     } else {
1167       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1168     }
1169     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1170 
1171     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1173     work_mat[0] = new_mat;
1174     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1175     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1176   }
1177 
1178   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1179   *B = work_mat[0];
1180   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1181   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1182   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1188 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1189 {
1190   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1192   Mat            new_mat;
1193   IS             is_local,is_global;
1194   PetscInt       local_size;
1195   PetscBool      isseqaij;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1200   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1201   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1202   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1203   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1205   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1206 
1207   /* check */
1208   if (pcbddc->dbg_flag) {
1209     Vec       x,x_change;
1210     PetscReal error;
1211 
1212     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1213     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1214     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1215     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1221     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1222     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1223     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1224     ierr = VecDestroy(&x);CHKERRQ(ierr);
1225     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1226   }
1227 
1228   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1229   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1230   if (isseqaij) {
1231     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232   } else {
1233     Mat work_mat;
1234     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1235     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1237   }
1238   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1239 #if !defined(PETSC_USE_COMPLEX)
1240   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1241 #endif
1242   /*
1243   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1244   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1245   */
1246   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1252 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1253 {
1254   PC_IS*         pcis = (PC_IS*)(pc->data);
1255   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1256   IS             is_aux1,is_aux2;
1257   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1258   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1259   PetscInt       vbs,bs;
1260   PetscBT        bitmask;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   /*
1265     No need to setup local scatters if
1266       - primal space is unchanged
1267         AND
1268       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1269         AND
1270       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1271   */
1272   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1273     PetscFunctionReturn(0);
1274   }
1275   /* destroy old objects */
1276   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1278   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1279   /* Set Non-overlapping dimensions */
1280   n_B = pcis->n_B; n_D = pcis->n - n_B;
1281   n_vertices = pcbddc->n_actual_vertices;
1282   /* create auxiliary bitmask */
1283   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1284   for (i=0;i<n_vertices;i++) {
1285     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1286   }
1287 
1288   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1289   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1290   for (i=0, n_R=0; i<pcis->n; i++) {
1291     if (!PetscBTLookup(bitmask,i)) {
1292       idx_R_local[n_R] = i;
1293       n_R++;
1294     }
1295   }
1296 
1297   /* Block code */
1298   vbs = 1;
1299   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1300   if (bs>1 && !(n_vertices%bs)) {
1301     PetscBool is_blocked = PETSC_TRUE;
1302     PetscInt  *vary;
1303     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1304     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1305     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1306     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1307     for (i=0; i<n_vertices/bs; i++) {
1308       if (vary[i]!=0 && vary[i]!=bs) {
1309         is_blocked = PETSC_FALSE;
1310         break;
1311       }
1312     }
1313     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1314       vbs = bs;
1315       for (i=0;i<n_R/vbs;i++) {
1316         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1317       }
1318     }
1319     ierr = PetscFree(vary);CHKERRQ(ierr);
1320   }
1321   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1322   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1323 
1324   /* print some info if requested */
1325   if (pcbddc->dbg_flag) {
1326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1329     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1330     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1331     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1332     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1333     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1334   }
1335 
1336   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1337   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1339   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1340   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0; i<n_D; i++) {
1342     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1343   }
1344   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0, j=0; i<n_R; i++) {
1346     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1347       aux_array1[j++] = i;
1348     }
1349   }
1350   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1352   for (i=0, j=0; i<n_B; i++) {
1353     if (!PetscBTLookup(bitmask,is_indices[i])) {
1354       aux_array2[j++] = i;
1355     }
1356   }
1357   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1358   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1359   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1360   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1361   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1362 
1363   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1364     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1365     for (i=0, j=0; i<n_R; i++) {
1366       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1367         aux_array1[j++] = i;
1368       }
1369     }
1370     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1371     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1372     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1373   }
1374   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1375   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1382 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1383 {
1384   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1385   PC_IS          *pcis = (PC_IS*)pc->data;
1386   PC             pc_temp;
1387   Mat            A_RR;
1388   MatReuse       reuse;
1389   PetscScalar    m_one = -1.0;
1390   PetscReal      value;
1391   PetscInt       n_D,n_R,ibs,mbs;
1392   PetscBool      use_exact,use_exact_reduced,issbaij;
1393   PetscErrorCode ierr;
1394   /* prefixes stuff */
1395   char           dir_prefix[256],neu_prefix[256],str_level[16];
1396   size_t         len;
1397 
1398   PetscFunctionBegin;
1399 
1400   /* compute prefixes */
1401   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1402   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1403   if (!pcbddc->current_level) {
1404     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1405     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1406     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1407     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1410     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1411     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1412     len -= 15; /* remove "pc_bddc_coarse_" */
1413     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1414     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1415     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1416     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1417     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1418     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1419     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1420     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1421   }
1422 
1423   /* DIRICHLET PROBLEM */
1424   if (dirichlet) {
1425     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1426     if (pcbddc->issym) {
1427       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1428     }
1429     /* Matrix for Dirichlet problem is pcis->A_II */
1430     n_D = pcis->n - pcis->n_B;
1431     if (!pcbddc->ksp_D) { /* create object if not yet build */
1432       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1433       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1434       /* default */
1435       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1436       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1437       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1439       if (issbaij) {
1440         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1441       } else {
1442         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1443       }
1444       /* Allow user's customization */
1445       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1446       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1447     }
1448     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1449     if (sub_schurs->interior_solver) {
1450       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1451     }
1452     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1453     if (!n_D) {
1454       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1455       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1456     }
1457     /* Set Up KSP for Dirichlet problem of BDDC */
1458     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1459     /* set ksp_D into pcis data */
1460     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1461     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1462     pcis->ksp_D = pcbddc->ksp_D;
1463   }
1464 
1465   /* NEUMANN PROBLEM */
1466   A_RR = 0;
1467   if (neumann) {
1468     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1469     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1470     if (pcbddc->ksp_R) { /* already created ksp */
1471       PetscInt nn_R;
1472       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1473       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1474       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1475       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1476         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1477         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478         reuse = MAT_INITIAL_MATRIX;
1479       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1480         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1481           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482           reuse = MAT_INITIAL_MATRIX;
1483         } else { /* safe to reuse the matrix */
1484           reuse = MAT_REUSE_MATRIX;
1485         }
1486       }
1487       /* last check */
1488       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1489         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1490         reuse = MAT_INITIAL_MATRIX;
1491       }
1492     } else { /* first time, so we need to create the matrix */
1493       reuse = MAT_INITIAL_MATRIX;
1494     }
1495     /* extract A_RR */
1496     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1497     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1498     if (ibs != mbs) {
1499       Mat newmat;
1500       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1501       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1502       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1505     }
1506     if (pcbddc->issym) {
1507       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1508     }
1509     if (!pcbddc->ksp_R) { /* create object if not present */
1510       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1511       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1512       /* default */
1513       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1514       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1516       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1517       if (issbaij) {
1518         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1519       } else {
1520         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1521       }
1522       /* Allow user's customization */
1523       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1524       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1527     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1528     if (!n_R) {
1529       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1530       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1531     }
1532     /* Set Up KSP for Neumann problem of BDDC */
1533     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1534   }
1535 
1536   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1537   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1538     if (pcbddc->dbg_flag) {
1539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1542     }
1543     if (dirichlet) { /* Dirichlet */
1544       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1545       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1546       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1547       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1548       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1549       /* need to be adapted? */
1550       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1551       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1552       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1553       /* print info */
1554       if (pcbddc->dbg_flag) {
1555         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1556         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1557       }
1558       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1559         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1560       }
1561     }
1562     if (neumann) { /* Neumann */
1563       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1564       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1565       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1566       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1567       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1568       /* need to be adapted? */
1569       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1570       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1571       /* print info */
1572       if (pcbddc->dbg_flag) {
1573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1574         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1575       }
1576       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1577         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1578       }
1579     }
1580   }
1581   /* free Neumann problem's matrix */
1582   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1588 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1589 {
1590   PetscErrorCode ierr;
1591   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1592 
1593   PetscFunctionBegin;
1594   if (applytranspose) {
1595     if (pcbddc->local_auxmat1) {
1596       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1597       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1598     }
1599     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1600   } else {
1601     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1602     if (pcbddc->local_auxmat1) {
1603       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1604       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1605     }
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1611 #undef __FUNCT__
1612 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1613 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1614 {
1615   PetscErrorCode ierr;
1616   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1617   PC_IS*            pcis = (PC_IS*)  (pc->data);
1618   const PetscScalar zero = 0.0;
1619 
1620   PetscFunctionBegin;
1621   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1622   if (applytranspose) {
1623     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1624     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1625   } else {
1626     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1628   }
1629   /* start communications from local primal nodes to rhs of coarse solver */
1630   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1631   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1635   /* TODO remove null space when doing multilevel */
1636   if (pcbddc->coarse_ksp) {
1637     Vec rhs,sol;
1638 
1639     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1640     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1641     if (applytranspose) {
1642       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1643     } else {
1644       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1645     }
1646   }
1647 
1648   /* Local solution on R nodes */
1649   if (pcis->n) {
1650     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     }
1657     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1658     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (pcbddc->switch_static) {
1662       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     }
1665   }
1666 
1667   /* communications from coarse sol to local primal nodes */
1668   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1669   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1670 
1671   /* Sum contributions from two levels */
1672   if (applytranspose) {
1673     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1674     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1675   } else {
1676     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1677     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1684 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1685 {
1686   PetscErrorCode ierr;
1687   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1688   PetscScalar    *array;
1689   Vec            from,to;
1690 
1691   PetscFunctionBegin;
1692   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1693     from = pcbddc->coarse_vec;
1694     to = pcbddc->vec1_P;
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697 
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1700       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1702       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1703       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1704     }
1705   } else { /* from local to global -> put data in coarse right hand side */
1706     from = pcbddc->vec1_P;
1707     to = pcbddc->coarse_vec;
1708   }
1709   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1715 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1716 {
1717   PetscErrorCode ierr;
1718   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1719   PetscScalar    *array;
1720   Vec            from,to;
1721 
1722   PetscFunctionBegin;
1723   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1724     from = pcbddc->coarse_vec;
1725     to = pcbddc->vec1_P;
1726   } else { /* from local to global -> put data in coarse right hand side */
1727     from = pcbddc->vec1_P;
1728     to = pcbddc->coarse_vec;
1729   }
1730   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1731   if (smode == SCATTER_FORWARD) {
1732     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1733       Vec tvec;
1734 
1735       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1736       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1737       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1738       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1739     }
1740   } else {
1741     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1742      ierr = VecResetArray(from);CHKERRQ(ierr);
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* uncomment for testing purposes */
1749 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1750 #undef __FUNCT__
1751 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1752 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1753 {
1754   PetscErrorCode    ierr;
1755   PC_IS*            pcis = (PC_IS*)(pc->data);
1756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1758   /* one and zero */
1759   PetscScalar       one=1.0,zero=0.0;
1760   /* space to store constraints and their local indices */
1761   PetscScalar       *temp_quadrature_constraint;
1762   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1763   /* iterators */
1764   PetscInt          i,j,k,total_counts,temp_start_ptr;
1765   /* BLAS integers */
1766   PetscBLASInt      lwork,lierr;
1767   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1768   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1769   /* reuse */
1770   PetscInt          olocal_primal_size;
1771   PetscInt          *oprimal_indices_local_idxs;
1772   /* change of basis */
1773   PetscBool         boolforchange,qr_needed;
1774   PetscBT           touched,change_basis,qr_needed_idx;
1775   /* auxiliary stuff */
1776   PetscInt          *nnz,*is_indices;
1777   PetscInt          ncc;
1778   /* some quantities */
1779   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1780   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1781 
1782   PetscFunctionBegin;
1783   /* Destroy Mat objects computed previously */
1784   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1785   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1786   /* save info on constraints from previous setup (if any) */
1787   olocal_primal_size = pcbddc->local_primal_size;
1788   ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
1789   ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
1790   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1791 
1792   /* print some info */
1793   if (pcbddc->dbg_flag) {
1794     IS       vertices;
1795     PetscInt nv,nedges,nfaces;
1796     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1797     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1798     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1799     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1800     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1801     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1805   }
1806 
1807   if (!pcbddc->adaptive_selection) {
1808     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1809     MatNullSpace nearnullsp;
1810     const Vec    *nearnullvecs;
1811     Vec          *localnearnullsp;
1812     PetscScalar  *array;
1813     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1814     PetscBool    nnsp_has_cnst;
1815     /* LAPACK working arrays for SVD or POD */
1816     PetscBool    skip_lapack;
1817     PetscScalar  *work;
1818     PetscReal    *singular_vals;
1819 #if defined(PETSC_USE_COMPLEX)
1820     PetscReal    *rwork;
1821 #endif
1822 #if defined(PETSC_MISSING_LAPACK_GESVD)
1823     PetscScalar  *temp_basis,*correlation_mat;
1824 #else
1825     PetscBLASInt dummy_int=1;
1826     PetscScalar  dummy_scalar=1.;
1827 #endif
1828 
1829     /* Get index sets for faces, edges and vertices from graph */
1830     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1831     /* free unneeded index sets */
1832     if (!pcbddc->use_vertices) {
1833       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1834     }
1835     if (!pcbddc->use_edges) {
1836       for (i=0;i<n_ISForEdges;i++) {
1837         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1838       }
1839       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1840       n_ISForEdges = 0;
1841     }
1842     if (!pcbddc->use_faces) {
1843       for (i=0;i<n_ISForFaces;i++) {
1844         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1845       }
1846       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1847       n_ISForFaces = 0;
1848     }
1849 
1850 #if defined(PETSC_USE_DEBUG)
1851     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
1852        Also use_change_of_basis should be consistent among processors */
1853     if (pcbddc->NullSpace) {
1854       PetscBool tbool[2],gbool[2];
1855 
1856       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
1857         pcbddc->use_change_of_basis = PETSC_TRUE;
1858         if (!ISForEdges) {
1859           pcbddc->use_change_on_faces = PETSC_TRUE;
1860         }
1861       }
1862       tbool[0] = pcbddc->use_change_of_basis;
1863       tbool[1] = pcbddc->use_change_on_faces;
1864       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1865       pcbddc->use_change_of_basis = gbool[0];
1866       pcbddc->use_change_on_faces = gbool[1];
1867     }
1868 #endif
1869 
1870     /* check if near null space is attached to global mat */
1871     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1872     if (nearnullsp) {
1873       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1874       /* remove any stored info */
1875       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1876       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1877       /* store information for BDDC solver reuse */
1878       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1879       pcbddc->onearnullspace = nearnullsp;
1880       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1881       for (i=0;i<nnsp_size;i++) {
1882         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1883       }
1884     } else { /* if near null space is not provided BDDC uses constants by default */
1885       nnsp_size = 0;
1886       nnsp_has_cnst = PETSC_TRUE;
1887     }
1888     /* get max number of constraints on a single cc */
1889     max_constraints = nnsp_size;
1890     if (nnsp_has_cnst) max_constraints++;
1891 
1892     /*
1893          Evaluate maximum storage size needed by the procedure
1894          - temp_indices will contain start index of each constraint stored as follows
1895          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1896          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1897          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1898                                                                                                                                                            */
1899     total_counts = n_ISForFaces+n_ISForEdges;
1900     total_counts *= max_constraints;
1901     n_vertices = 0;
1902     if (ISForVertices) {
1903       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1904     }
1905     total_counts += n_vertices;
1906     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1907     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1908     total_counts = 0;
1909     max_size_of_constraint = 0;
1910     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1911       if (i<n_ISForEdges) {
1912         used_IS = &ISForEdges[i];
1913       } else {
1914         used_IS = &ISForFaces[i-n_ISForEdges];
1915       }
1916       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1917       total_counts += j;
1918       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1919     }
1920     total_counts *= max_constraints;
1921     total_counts += n_vertices;
1922     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1923     /* get local part of global near null space vectors */
1924     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1925     for (k=0;k<nnsp_size;k++) {
1926       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1927       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1928       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1929     }
1930 
1931     /* whether or not to skip lapack calls */
1932     skip_lapack = PETSC_TRUE;
1933     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1934 
1935     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1936     if (!skip_lapack) {
1937       PetscScalar temp_work;
1938 
1939 #if defined(PETSC_MISSING_LAPACK_GESVD)
1940       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1941       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1942       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1943       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1944 #if defined(PETSC_USE_COMPLEX)
1945       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1946 #endif
1947       /* now we evaluate the optimal workspace using query with lwork=-1 */
1948       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1949       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1950       lwork = -1;
1951       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1952 #if !defined(PETSC_USE_COMPLEX)
1953       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1954 #else
1955       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1956 #endif
1957       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1958       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1959 #else /* on missing GESVD */
1960       /* SVD */
1961       PetscInt max_n,min_n;
1962       max_n = max_size_of_constraint;
1963       min_n = max_constraints;
1964       if (max_size_of_constraint < max_constraints) {
1965         min_n = max_size_of_constraint;
1966         max_n = max_constraints;
1967       }
1968       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1969 #if defined(PETSC_USE_COMPLEX)
1970       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1971 #endif
1972       /* now we evaluate the optimal workspace using query with lwork=-1 */
1973       lwork = -1;
1974       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1975       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1976       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1977       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1978 #if !defined(PETSC_USE_COMPLEX)
1979       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1980 #else
1981       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1982 #endif
1983       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1984       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1985 #endif /* on missing GESVD */
1986       /* Allocate optimal workspace */
1987       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1988       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1989     }
1990     /* Now we can loop on constraining sets */
1991     total_counts = 0;
1992     temp_indices[0] = 0;
1993     /* vertices */
1994     if (ISForVertices) {
1995       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1996       if (nnsp_has_cnst) { /* consider all vertices */
1997         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1998         for (i=0;i<n_vertices;i++) {
1999           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2000           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2001           total_counts++;
2002         }
2003       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2004         PetscBool used_vertex;
2005         for (i=0;i<n_vertices;i++) {
2006           used_vertex = PETSC_FALSE;
2007           k = 0;
2008           while (!used_vertex && k<nnsp_size) {
2009             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2010             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2011               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2012               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2013               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2014               total_counts++;
2015               used_vertex = PETSC_TRUE;
2016             }
2017             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2018             k++;
2019           }
2020         }
2021       }
2022       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2023       n_vertices = total_counts;
2024     }
2025 
2026     /* edges and faces */
2027     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2028       if (ncc<n_ISForEdges) {
2029         used_IS = &ISForEdges[ncc];
2030         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2031       } else {
2032         used_IS = &ISForFaces[ncc-n_ISForEdges];
2033         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2034       }
2035       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2036       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2037       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2038       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2039       /* change of basis should not be performed on local periodic nodes */
2040       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2041       if (nnsp_has_cnst) {
2042         PetscScalar quad_value;
2043         temp_constraints++;
2044         if (!pcbddc->use_nnsp_true) {
2045           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2046         } else {
2047           quad_value = 1.0;
2048         }
2049         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2050         for (j=0;j<size_of_constraint;j++) {
2051           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2052         }
2053         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2054         total_counts++;
2055       }
2056       for (k=0;k<nnsp_size;k++) {
2057         PetscReal real_value;
2058         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2059         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2060         for (j=0;j<size_of_constraint;j++) {
2061           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2062         }
2063         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2064         /* check if array is null on the connected component */
2065         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2066         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2067         if (real_value > 0.0) { /* keep indices and values */
2068           temp_constraints++;
2069           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2070           total_counts++;
2071         }
2072       }
2073       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2074       valid_constraints = temp_constraints;
2075       if (!pcbddc->use_nnsp_true && temp_constraints) {
2076         if (temp_constraints == 1) { /* just normalize the constraint */
2077           PetscScalar norm;
2078           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2079           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2080           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2081           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2082         } else { /* perform SVD */
2083           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2084 
2085 #if defined(PETSC_MISSING_LAPACK_GESVD)
2086           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2087              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2088              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2089                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2090                 from that computed using LAPACKgesvd
2091              -> This is due to a different computation of eigenvectors in LAPACKheev
2092              -> The quality of the POD-computed basis will be the same */
2093           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2094           /* Store upper triangular part of correlation matrix */
2095           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2096           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2097           for (j=0;j<temp_constraints;j++) {
2098             for (k=0;k<j+1;k++) {
2099               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2100             }
2101           }
2102           /* compute eigenvalues and eigenvectors of correlation matrix */
2103           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2104           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2105 #if !defined(PETSC_USE_COMPLEX)
2106           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2107 #else
2108           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2109 #endif
2110           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2111           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2112           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2113           j = 0;
2114           while (j < temp_constraints && singular_vals[j] < tol) j++;
2115           total_counts = total_counts-j;
2116           valid_constraints = temp_constraints-j;
2117           /* scale and copy POD basis into used quadrature memory */
2118           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2119           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2120           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2121           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2122           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2123           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2124           if (j<temp_constraints) {
2125             PetscInt ii;
2126             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2127             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2128             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2129             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2130             for (k=0;k<temp_constraints-j;k++) {
2131               for (ii=0;ii<size_of_constraint;ii++) {
2132                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2133               }
2134             }
2135           }
2136 #else  /* on missing GESVD */
2137           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2138           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2139           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2140           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2141 #if !defined(PETSC_USE_COMPLEX)
2142           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2143 #else
2144           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2145 #endif
2146           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2147           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2148           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2149           k = temp_constraints;
2150           if (k > size_of_constraint) k = size_of_constraint;
2151           j = 0;
2152           while (j < k && singular_vals[k-j-1] < tol) j++;
2153           valid_constraints = k-j;
2154           total_counts = total_counts-temp_constraints+valid_constraints;
2155 #endif /* on missing GESVD */
2156         }
2157       }
2158       /* setting change_of_basis flag is safe now */
2159       if (boolforchange) {
2160         for (j=0;j<valid_constraints;j++) {
2161           PetscBTSet(change_basis,total_counts-j-1);
2162         }
2163       }
2164     }
2165     /* free workspace */
2166     if (!skip_lapack) {
2167       ierr = PetscFree(work);CHKERRQ(ierr);
2168 #if defined(PETSC_USE_COMPLEX)
2169       ierr = PetscFree(rwork);CHKERRQ(ierr);
2170 #endif
2171       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2172 #if defined(PETSC_MISSING_LAPACK_GESVD)
2173       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2174       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2175 #endif
2176     }
2177     for (k=0;k<nnsp_size;k++) {
2178       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2179     }
2180     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2181     /* free index sets of faces, edges and vertices */
2182     for (i=0;i<n_ISForFaces;i++) {
2183       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2184     }
2185     if (n_ISForFaces) {
2186       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2187     }
2188     for (i=0;i<n_ISForEdges;i++) {
2189       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2190     }
2191     if (n_ISForEdges) {
2192       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2193     }
2194     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2195   } else {
2196     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2197     PetscInt        cum = 0;
2198 
2199     total_counts = 0;
2200     n_vertices = 0;
2201     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2202       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2203     }
2204     max_constraints = 0;
2205     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2206       total_counts += pcbddc->adaptive_constraints_n[i];
2207       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2208     }
2209     temp_indices = pcbddc->adaptive_constraints_ptrs;
2210     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2211     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2212 
2213 #if 0
2214     printf("Found %d totals\n",total_counts);
2215     for (i=0;i<total_counts;i++) {
2216       printf("const %d, start %d",i,temp_indices[i]);
2217       printf(" end %d:\n",temp_indices[i+1]);
2218       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2219         printf("  idxs %d",temp_indices_to_constraint[j]);
2220         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2221       }
2222     }
2223     for (i=0;i<n_vertices;i++) {
2224       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2225     }
2226     for (i=0;i<sub_schurs->n_subs;i++) {
2227       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2228     }
2229 #endif
2230 
2231     max_size_of_constraint = 0;
2232     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2233     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2234     /* Change of basis */
2235     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2236     if (pcbddc->use_change_of_basis) {
2237       cum = n_vertices;
2238       for (i=0;i<sub_schurs->n_subs;i++) {
2239         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2240           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2241             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2242           }
2243         }
2244         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2245       }
2246     }
2247   }
2248 
2249   /* map temp_indices_to_constraint in boundary numbering */
2250   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2251   if (i != temp_indices[total_counts]) {
2252     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2253   }
2254 
2255   /* set quantities in pcbddc data structure and store previous primal size */
2256   /* n_vertices defines the number of subdomain corners in the primal space */
2257   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2258   pcbddc->local_primal_size = total_counts;
2259   pcbddc->n_vertices = n_vertices;
2260   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2261 
2262   /* Create constraint matrix */
2263   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2264   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2265   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2266   /* array to compute a local numbering of constraints : vertices first then constraints */
2267   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2268   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2269 
2270   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2271   total_primal_vertices=0;
2272   for (i=0;i<pcbddc->local_primal_size;i++) {
2273     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2274     if (size_of_constraint == 1) {
2275       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2276       pcbddc->primal_indices_local_idxs[total_primal_vertices++]=temp_indices_to_constraint[temp_indices[i]];
2277     } else if (PetscBTLookup(change_basis,i)) {
2278       /* find first (by global ordering) untouched node in the cc */
2279       k = 0;
2280       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2281       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+k]);CHKERRQ(ierr);
2282       pcbddc->primal_indices_local_idxs[total_primal_vertices++]=temp_indices_to_constraint[temp_indices[i]+k];
2283     }
2284   }
2285   pcbddc->n_actual_vertices = total_primal_vertices;
2286   /* permute indices in order to have a sorted set of vertices */
2287   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2288 
2289   /* determine if a QR strategy is needed for change of basis */
2290   qr_needed = PETSC_FALSE;
2291   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2292   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2293     if (PetscBTLookup(change_basis,i)) {
2294       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2295         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2296         j = 0;
2297         for (k=0;k<size_of_constraint;k++) {
2298           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2299             j++;
2300           }
2301         }
2302         /* found more than one primal dof on the cc */
2303         if (j > 1) {
2304           PetscBTSet(qr_needed_idx,i);
2305           qr_needed = PETSC_TRUE;
2306         }
2307       } else {
2308         PetscBTSet(qr_needed_idx,i);
2309         qr_needed = PETSC_TRUE;
2310       }
2311     }
2312   }
2313 
2314   /* get reference dof for local constraints */
2315   total_counts = total_primal_vertices;
2316   for (i=0;i<pcbddc->local_primal_size;i++) {
2317     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2318     if (size_of_constraint != 1 && !PetscBTLookup(change_basis,i)) {
2319       /* find first (by global ordering) untouched node in the cc */
2320       k = 0;
2321       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2322       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+k]);CHKERRQ(ierr);
2323       pcbddc->primal_indices_local_idxs[total_counts++]=temp_indices_to_constraint[temp_indices[i]+k];
2324     }
2325   }
2326 
2327   /* nonzero structure of constraint matrix */
2328   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2329   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2330   j=total_primal_vertices;
2331   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2332     if (!PetscBTLookup(change_basis,i)) {
2333       nnz[j]=temp_indices[i+1]-temp_indices[i];
2334       j++;
2335     }
2336   }
2337   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2338   ierr = PetscFree(nnz);CHKERRQ(ierr);
2339 
2340   /* set values in constraint matrix */
2341   for (i=0;i<total_primal_vertices;i++) {
2342     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2343   }
2344   total_counts = total_primal_vertices;
2345   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2346     if (!PetscBTLookup(change_basis,i)) {
2347       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2348       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2349       total_counts++;
2350     }
2351   }
2352   /* assembling */
2353   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2354   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2355 
2356   /*
2357   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2358   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2359   */
2360   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2361   if (pcbddc->use_change_of_basis) {
2362     /* dual and primal dofs on a single cc */
2363     PetscInt     dual_dofs,primal_dofs;
2364     /* working stuff for GEQRF */
2365     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2366     PetscBLASInt lqr_work;
2367     /* working stuff for UNGQR */
2368     PetscScalar  *gqr_work,lgqr_work_t;
2369     PetscBLASInt lgqr_work;
2370     /* working stuff for TRTRS */
2371     PetscScalar  *trs_rhs;
2372     PetscBLASInt Blas_NRHS;
2373     /* pointers for values insertion into change of basis matrix */
2374     PetscInt     *start_rows,*start_cols;
2375     PetscScalar  *start_vals;
2376     /* working stuff for values insertion */
2377     PetscBT      is_primal;
2378     PetscInt     *aux_primal_numbering_B;
2379     /* matrix sizes */
2380     PetscInt     global_size,local_size;
2381     /* temporary change of basis */
2382     Mat          localChangeOfBasisMatrix;
2383     /* extra space for debugging */
2384     PetscScalar  *dbg_work;
2385 
2386     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2387     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2388     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2389     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2390     /* nonzeros for local mat */
2391     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2392     for (i=0;i<pcis->n;i++) nnz[i]=1;
2393     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2394       if (PetscBTLookup(change_basis,i)) {
2395         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2396         if (PetscBTLookup(qr_needed_idx,i)) {
2397           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2398         } else {
2399           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2400           /* get local primal index on the cc */
2401           j = 0;
2402           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2403           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2404         }
2405       }
2406     }
2407     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2408     ierr = PetscFree(nnz);CHKERRQ(ierr);
2409     /* Set initial identity in the matrix */
2410     for (i=0;i<pcis->n;i++) {
2411       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2412     }
2413 
2414     if (pcbddc->dbg_flag) {
2415       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2416       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2417     }
2418 
2419 
2420     /* Now we loop on the constraints which need a change of basis */
2421     /*
2422        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2423        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2424 
2425        Basic blocks of change of basis matrix T computed by
2426 
2427           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2428 
2429             | 1        0   ...        0         s_1/S |
2430             | 0        1   ...        0         s_2/S |
2431             |              ...                        |
2432             | 0        ...            1     s_{n-1}/S |
2433             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2434 
2435             with S = \sum_{i=1}^n s_i^2
2436             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2437                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2438 
2439           - QR decomposition of constraints otherwise
2440     */
2441     if (qr_needed) {
2442       /* space to store Q */
2443       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2444       /* first we issue queries for optimal work */
2445       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2446       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2447       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2448       lqr_work = -1;
2449       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2450       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2451       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2452       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2453       lgqr_work = -1;
2454       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2455       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2456       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2457       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2458       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2459       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2460       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2461       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2462       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2463       /* array to store scaling factors for reflectors */
2464       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2465       /* array to store rhs and solution of triangular solver */
2466       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2467       /* allocating workspace for check */
2468       if (pcbddc->dbg_flag) {
2469         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2470       }
2471     }
2472     /* array to store whether a node is primal or not */
2473     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2474     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2475     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->primal_indices_local_idxs,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2476     if (i != total_primal_vertices) {
2477       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2478     }
2479     for (i=0;i<total_primal_vertices;i++) {
2480       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2481     }
2482     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2483 
2484     /* loop on constraints and see whether or not they need a change of basis and compute it */
2485     /* -> using implicit ordering contained in temp_indices data */
2486     total_counts = pcbddc->n_vertices;
2487     while (total_counts<pcbddc->local_primal_size) {
2488       primal_dofs = 1;
2489       if (PetscBTLookup(change_basis,total_counts)) {
2490         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2491         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2492           primal_dofs++;
2493         }
2494         /* get constraint info */
2495         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2496         dual_dofs = size_of_constraint-primal_dofs;
2497 
2498         if (pcbddc->dbg_flag) {
2499           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2500         }
2501 
2502         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2503 
2504           /* copy quadrature constraints for change of basis check */
2505           if (pcbddc->dbg_flag) {
2506             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2507           }
2508           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2509           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2510 
2511           /* compute QR decomposition of constraints */
2512           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2513           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2514           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2515           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2516           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2517           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2518           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2519 
2520           /* explictly compute R^-T */
2521           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2522           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2523           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2524           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2525           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2526           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2527           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2528           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2529           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2530           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2531 
2532           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2533           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2537           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2538           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2539           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2540           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2541 
2542           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2543              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2544              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2545           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2546           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2547           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2548           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2549           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2550           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2551           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2552           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2553           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2554           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2555 
2556           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2557           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2558           /* insert cols for primal dofs */
2559           for (j=0;j<primal_dofs;j++) {
2560             start_vals = &qr_basis[j*size_of_constraint];
2561             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+j];
2562             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2563           }
2564           /* insert cols for dual dofs */
2565           for (j=0,k=0;j<dual_dofs;k++) {
2566             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2567               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2568               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2569               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2570               j++;
2571             }
2572           }
2573 
2574           /* check change of basis */
2575           if (pcbddc->dbg_flag) {
2576             PetscInt   ii,jj;
2577             PetscBool valid_qr=PETSC_TRUE;
2578             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2579             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2580             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2581             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2582             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2583             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2584             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2585             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2586             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2587             for (jj=0;jj<size_of_constraint;jj++) {
2588               for (ii=0;ii<primal_dofs;ii++) {
2589                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2590                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2591               }
2592             }
2593             if (!valid_qr) {
2594               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2595               for (jj=0;jj<size_of_constraint;jj++) {
2596                 for (ii=0;ii<primal_dofs;ii++) {
2597                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2598                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2599                   }
2600                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2601                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2602                   }
2603                 }
2604               }
2605             } else {
2606               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2607             }
2608           }
2609         } else { /* simple transformation block */
2610           PetscInt    row,col;
2611           PetscScalar val,norm;
2612 
2613           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2614           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2615           for (j=0;j<size_of_constraint;j++) {
2616             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2617             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2618             if (!PetscBTLookup(is_primal,row_B)) {
2619               col = temp_indices_to_constraint[temp_indices[total_counts]];
2620               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2621               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2622             } else {
2623               for (k=0;k<size_of_constraint;k++) {
2624                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2625                 if (row != col) {
2626                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]];
2627                 } else {
2628                   val = temp_quadrature_constraint[temp_indices[total_counts]]/norm;
2629                 }
2630                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2631               }
2632             }
2633           }
2634           if (pcbddc->dbg_flag) {
2635             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2636           }
2637         }
2638       } else {
2639         if (pcbddc->dbg_flag) {
2640           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2641         }
2642       }
2643       /* increment constraint counter total_counts */
2644       total_counts += primal_dofs;
2645     }
2646 
2647     /* free workspace */
2648     if (qr_needed) {
2649       if (pcbddc->dbg_flag) {
2650         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2651       }
2652       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2653       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2654       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2655       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2656       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2657     }
2658     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2659     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2660     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2661 
2662     /* assembling of global change of variable */
2663     {
2664       Mat      tmat;
2665       PetscInt bs;
2666 
2667       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2668       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2669       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2670       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2671       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2672       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2673       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2674       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2675       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2676       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2677       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2678       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2679       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2680       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2681       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2682       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2683       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2684       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2685     }
2686     /* check */
2687     if (pcbddc->dbg_flag) {
2688       PetscReal error;
2689       Vec       x,x_change;
2690 
2691       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2692       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2693       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2694       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2695       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2696       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2697       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2698       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2699       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2700       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2701       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2702       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2703       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2704       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2705       ierr = VecDestroy(&x);CHKERRQ(ierr);
2706       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2707     }
2708 
2709     /* adapt sub_schurs computed (if any) */
2710     if (pcbddc->use_deluxe_scaling) {
2711       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2712       if (sub_schurs->S_Ej_all) {
2713         Mat S_new,tmat;
2714         IS is_all_N;
2715 
2716         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2717         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2718         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2719         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2720         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2721         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2722         sub_schurs->S_Ej_all = S_new;
2723         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2724         if (sub_schurs->sum_S_Ej_all) {
2725           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2726           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2727           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2728           sub_schurs->sum_S_Ej_all = S_new;
2729           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2730         }
2731         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2732       }
2733     }
2734     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2735   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2736     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2737     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2738   }
2739 
2740   /* set up change of basis context */
2741   if (pcbddc->ChangeOfBasisMatrix) {
2742     PCBDDCChange_ctx change_ctx;
2743 
2744     if (!pcbddc->new_global_mat) {
2745       PetscInt global_size,local_size;
2746 
2747       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2748       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2749       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2750       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2751       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2752       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2753       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2754       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2755       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2756     } else {
2757       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2758       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2759       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2760     }
2761     if (!pcbddc->user_ChangeOfBasisMatrix) {
2762       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2763       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2764     } else {
2765       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2766       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2767     }
2768     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2769     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2770     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2771     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2772   }
2773 
2774   /* check if a new primal space has been introduced */
2775   pcbddc->new_primal_space_local = PETSC_TRUE;
2776   if (olocal_primal_size == pcbddc->local_primal_size) {
2777     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2778     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2779   }
2780   ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2781   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2782   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2783 
2784   /* flush dbg viewer */
2785   if (pcbddc->dbg_flag) {
2786     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2787   }
2788 
2789   /* free workspace */
2790   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2791   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2792   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2793   if (!pcbddc->adaptive_selection) {
2794     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2795     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2796   } else {
2797     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2798                       pcbddc->adaptive_constraints_ptrs,
2799                       pcbddc->adaptive_constraints_idxs,
2800                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2801     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2802   }
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2808 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2809 {
2810   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2811   PC_IS       *pcis = (PC_IS*)pc->data;
2812   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2813   PetscInt    ierr,i,vertex_size,N;
2814   PetscViewer viewer=pcbddc->dbg_viewer;
2815 
2816   PetscFunctionBegin;
2817   /* Reset previously computed graph */
2818   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2819   /* Init local Graph struct */
2820   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2821   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2822 
2823   /* Check validity of the csr graph passed in by the user */
2824   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2825     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2826   }
2827 
2828   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2829   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2830     PetscInt  *xadj,*adjncy;
2831     PetscInt  nvtxs;
2832     PetscBool flg_row=PETSC_FALSE;
2833 
2834     if (pcbddc->use_local_adj) {
2835 
2836       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2837       if (flg_row) {
2838         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2839         pcbddc->computed_rowadj = PETSC_TRUE;
2840       }
2841       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2842     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2843       IS                     is_dummy;
2844       ISLocalToGlobalMapping l2gmap_dummy;
2845       PetscInt               j,sum;
2846       PetscInt               *cxadj,*cadjncy;
2847       const PetscInt         *idxs;
2848       PCBDDCGraph            graph;
2849       PetscBT                is_on_boundary;
2850 
2851       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2852       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2853       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2854       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2855       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2856       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2857       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2858       if (flg_row) {
2859         graph->xadj = xadj;
2860         graph->adjncy = adjncy;
2861       }
2862       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2863       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2864       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2865 
2866       if (pcbddc->dbg_flag) {
2867         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2868         for (i=0;i<graph->ncc;i++) {
2869           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2870         }
2871       }
2872 
2873       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2874       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2875       for (i=0;i<pcis->n_B;i++) {
2876         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2877       }
2878       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2879 
2880       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2881       sum = 0;
2882       for (i=0;i<graph->ncc;i++) {
2883         PetscInt sizecc = 0;
2884         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2885           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2886             sizecc++;
2887           }
2888         }
2889         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2890           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2891             cxadj[graph->queue[j]] = sizecc;
2892           }
2893         }
2894         sum += sizecc*sizecc;
2895       }
2896       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2897       sum = 0;
2898       for (i=0;i<pcis->n;i++) {
2899         PetscInt temp = cxadj[i];
2900         cxadj[i] = sum;
2901         sum += temp;
2902       }
2903       cxadj[pcis->n] = sum;
2904       for (i=0;i<graph->ncc;i++) {
2905         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2906           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2907             PetscInt k,sizecc = 0;
2908             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2909               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2910                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2911                 sizecc++;
2912               }
2913             }
2914           }
2915         }
2916       }
2917       if (sum) {
2918         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2919       } else {
2920         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2921         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2922       }
2923       graph->xadj = 0;
2924       graph->adjncy = 0;
2925       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2926       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2927     }
2928   }
2929   if (pcbddc->dbg_flag) {
2930     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2931   }
2932 
2933   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2934   vertex_size = 1;
2935   if (pcbddc->user_provided_isfordofs) {
2936     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2937       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2938       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2939         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2940         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2941       }
2942       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2943       pcbddc->n_ISForDofs = 0;
2944       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2945     }
2946     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2947     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2948   } else {
2949     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2950       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2951       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2952       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2953         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2954       }
2955     }
2956   }
2957 
2958   /* Setup of Graph */
2959   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2960     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2961   }
2962   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2963     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2964   }
2965   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
2966 
2967   /* Graph's connected components analysis */
2968   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2969 
2970   /* print some info to stdout */
2971   if (pcbddc->dbg_flag) {
2972     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
2973   }
2974 
2975   /* mark topography has done */
2976   pcbddc->recompute_topography = PETSC_FALSE;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 #undef __FUNCT__
2981 #define __FUNCT__ "PCBDDCSubsetNumbering"
2982 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2983 {
2984   Vec            local_vec,global_vec;
2985   IS             seqis,paris;
2986   VecScatter     scatter_ctx;
2987   PetscScalar    *array;
2988   PetscInt       *temp_global_dofs;
2989   PetscScalar    globalsum;
2990   PetscInt       i,j,s;
2991   PetscInt       nlocals,first_index,old_index,max_local,max_global;
2992   PetscMPIInt    rank_prec_comm,size_prec_comm;
2993   PetscInt       *dof_sizes,*dof_displs;
2994   PetscBool      first_found;
2995   PetscErrorCode ierr;
2996 
2997   PetscFunctionBegin;
2998   /* mpi buffers */
2999   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3000   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3001   j = ( !rank_prec_comm ? size_prec_comm : 0);
3002   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3003   /* get maximum size of subset */
3004   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3005   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3006   max_local = 0;
3007   for (i=0;i<n_local_dofs;i++) {
3008     if (max_local < temp_global_dofs[i] ) {
3009       max_local = temp_global_dofs[i];
3010     }
3011   }
3012   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3013   max_global++;
3014   max_local = 0;
3015   for (i=0;i<n_local_dofs;i++) {
3016     if (max_local < local_dofs[i] ) {
3017       max_local = local_dofs[i];
3018     }
3019   }
3020   max_local++;
3021   /* allocate workspace */
3022   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3023   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3024   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3025   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3026   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3027   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3028   /* create scatter */
3029   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3030   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3031   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3032   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3033   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3034   /* init array */
3035   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3036   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3037   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3038   if (local_dofs_mult) {
3039     for (i=0;i<n_local_dofs;i++) {
3040       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3041     }
3042   } else {
3043     for (i=0;i<n_local_dofs;i++) {
3044       array[local_dofs[i]]=1.0;
3045     }
3046   }
3047   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3048   /* scatter into global vec and get total number of global dofs */
3049   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3050   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3051   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3052   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3053   /* Fill global_vec with cumulative function for global numbering */
3054   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3055   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3056   nlocals = 0;
3057   first_index = -1;
3058   first_found = PETSC_FALSE;
3059   for (i=0;i<s;i++) {
3060     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3061       first_found = PETSC_TRUE;
3062       first_index = i;
3063     }
3064     nlocals += (PetscInt)PetscRealPart(array[i]);
3065   }
3066   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3067   if (!rank_prec_comm) {
3068     dof_displs[0]=0;
3069     for (i=1;i<size_prec_comm;i++) {
3070       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3071     }
3072   }
3073   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3074   if (first_found) {
3075     array[first_index] += (PetscScalar)nlocals;
3076     old_index = first_index;
3077     for (i=first_index+1;i<s;i++) {
3078       if (PetscRealPart(array[i]) > 0.1) {
3079         array[i] += array[old_index];
3080         old_index = i;
3081       }
3082     }
3083   }
3084   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3085   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3086   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3087   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3088   /* get global ordering of local dofs */
3089   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3090   if (local_dofs_mult) {
3091     for (i=0;i<n_local_dofs;i++) {
3092       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3093     }
3094   } else {
3095     for (i=0;i<n_local_dofs;i++) {
3096       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3097     }
3098   }
3099   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3100   /* free workspace */
3101   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3102   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3103   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3104   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3105   /* return pointer to global ordering of local dofs */
3106   *global_numbering_subset = temp_global_dofs;
3107   PetscFunctionReturn(0);
3108 }
3109 
3110 #undef __FUNCT__
3111 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3112 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3113 {
3114   PetscInt       i,j;
3115   PetscScalar    *alphas;
3116   PetscErrorCode ierr;
3117 
3118   PetscFunctionBegin;
3119   /* this implements stabilized Gram-Schmidt */
3120   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3121   for (i=0;i<n;i++) {
3122     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3123     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3124     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3125   }
3126   ierr = PetscFree(alphas);CHKERRQ(ierr);
3127   PetscFunctionReturn(0);
3128 }
3129 
3130 #undef __FUNCT__
3131 #define __FUNCT__ "MatISGetSubassemblingPattern"
3132 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3133 {
3134   Mat             subdomain_adj;
3135   IS              new_ranks,ranks_send_to;
3136   MatPartitioning partitioner;
3137   Mat_IS          *matis;
3138   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3139   PetscInt        prank;
3140   PetscMPIInt     size,rank,color;
3141   PetscInt        *xadj,*adjncy,*oldranks;
3142   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3143   PetscInt        i,local_size,threshold=0;
3144   PetscErrorCode  ierr;
3145   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3146   PetscSubcomm    subcomm;
3147 
3148   PetscFunctionBegin;
3149   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3150   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3151   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3152 
3153   /* Get info on mapping */
3154   matis = (Mat_IS*)(mat->data);
3155   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3156   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3157 
3158   /* build local CSR graph of subdomains' connectivity */
3159   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3160   xadj[0] = 0;
3161   xadj[1] = PetscMax(n_neighs-1,0);
3162   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3163   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3164 
3165   if (threshold) {
3166     PetscInt xadj_count = 0;
3167     for (i=1;i<n_neighs;i++) {
3168       if (n_shared[i] > threshold) {
3169         adjncy[xadj_count] = neighs[i];
3170         adjncy_wgt[xadj_count] = n_shared[i];
3171         xadj_count++;
3172       }
3173     }
3174     xadj[1] = xadj_count;
3175   } else {
3176     if (xadj[1]) {
3177       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3178       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3179     }
3180   }
3181   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3182   if (use_square) {
3183     for (i=0;i<xadj[1];i++) {
3184       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3185     }
3186   }
3187   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3188 
3189   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3190 
3191   /*
3192     Restrict work on active processes only.
3193   */
3194   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3195   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3196   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3197   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3198   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3199   if (color) {
3200     ierr = PetscFree(xadj);CHKERRQ(ierr);
3201     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3202     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3203   } else {
3204     PetscInt coarsening_ratio;
3205     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3206     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3207     prank = rank;
3208     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3209     /*
3210     for (i=0;i<size;i++) {
3211       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3212     }
3213     */
3214     for (i=0;i<xadj[1];i++) {
3215       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3216     }
3217     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3218     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3219     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3220 
3221     /* Partition */
3222     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3223     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3224     if (use_vwgt) {
3225       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3226       v_wgt[0] = local_size;
3227       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3228     }
3229     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3230     coarsening_ratio = size/n_subdomains;
3231     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3232     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3233     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3234     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3235 
3236     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3237     if (contiguous) {
3238       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3239     } else {
3240       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3241     }
3242     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3243     /* clean up */
3244     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3245     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3246     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3247     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3248   }
3249   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3250 
3251   /* assemble parallel IS for sends */
3252   i = 1;
3253   if (color) i=0;
3254   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3255 
3256   /* get back IS */
3257   *is_sends = ranks_send_to;
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3262 
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatISSubassemble"
3265 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3266 {
3267   Mat                    local_mat;
3268   Mat_IS                 *matis;
3269   IS                     is_sends_internal;
3270   PetscInt               rows,cols,new_local_rows;
3271   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3272   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3273   ISLocalToGlobalMapping l2gmap;
3274   PetscInt*              l2gmap_indices;
3275   const PetscInt*        is_indices;
3276   MatType                new_local_type;
3277   /* buffers */
3278   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3279   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3280   PetscInt               *recv_buffer_idxs_local;
3281   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3282   /* MPI */
3283   MPI_Comm               comm,comm_n;
3284   PetscSubcomm           subcomm;
3285   PetscMPIInt            n_sends,n_recvs,commsize;
3286   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3287   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3288   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3289   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3290   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3291   PetscErrorCode         ierr;
3292 
3293   PetscFunctionBegin;
3294   /* TODO: add missing checks */
3295   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3296   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3297   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3298   PetscValidLogicalCollectiveInt(mat,nis,7);
3299   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3300   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3301   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3302   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3303   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3304   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3305   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3306   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3307     PetscInt mrows,mcols,mnrows,mncols;
3308     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3309     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3310     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3311     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3312     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3313     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3314   }
3315   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3316   PetscValidLogicalCollectiveInt(mat,bs,0);
3317   /* prepare IS for sending if not provided */
3318   if (!is_sends) {
3319     PetscBool pcontig = PETSC_TRUE;
3320     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3321     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3322   } else {
3323     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3324     is_sends_internal = is_sends;
3325   }
3326 
3327   /* get pointer of MATIS data */
3328   matis = (Mat_IS*)mat->data;
3329 
3330   /* get comm */
3331   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3332 
3333   /* compute number of sends */
3334   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3335   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3336 
3337   /* compute number of receives */
3338   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3339   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3340   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3341   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3342   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3343   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3344   ierr = PetscFree(iflags);CHKERRQ(ierr);
3345 
3346   /* restrict comm if requested */
3347   subcomm = 0;
3348   destroy_mat = PETSC_FALSE;
3349   if (restrict_comm) {
3350     PetscMPIInt color,subcommsize;
3351 
3352     color = 0;
3353     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3354     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3355     subcommsize = commsize - subcommsize;
3356     /* check if reuse has been requested */
3357     if (reuse == MAT_REUSE_MATRIX) {
3358       if (*mat_n) {
3359         PetscMPIInt subcommsize2;
3360         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3361         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3362         comm_n = PetscObjectComm((PetscObject)*mat_n);
3363       } else {
3364         comm_n = PETSC_COMM_SELF;
3365       }
3366     } else { /* MAT_INITIAL_MATRIX */
3367       PetscMPIInt rank;
3368 
3369       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3370       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3371       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3372       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3373       comm_n = PetscSubcommChild(subcomm);
3374     }
3375     /* flag to destroy *mat_n if not significative */
3376     if (color) destroy_mat = PETSC_TRUE;
3377   } else {
3378     comm_n = comm;
3379   }
3380 
3381   /* prepare send/receive buffers */
3382   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3383   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3384   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3385   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3386   if (nis) {
3387     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3388   }
3389 
3390   /* Get data from local matrices */
3391   if (!isdense) {
3392     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3393     /* TODO: See below some guidelines on how to prepare the local buffers */
3394     /*
3395        send_buffer_vals should contain the raw values of the local matrix
3396        send_buffer_idxs should contain:
3397        - MatType_PRIVATE type
3398        - PetscInt        size_of_l2gmap
3399        - PetscInt        global_row_indices[size_of_l2gmap]
3400        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3401     */
3402   } else {
3403     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3404     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3405     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3406     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3407     send_buffer_idxs[1] = i;
3408     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3409     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3410     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3411     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3412     for (i=0;i<n_sends;i++) {
3413       ilengths_vals[is_indices[i]] = len*len;
3414       ilengths_idxs[is_indices[i]] = len+2;
3415     }
3416   }
3417   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3418   /* additional is (if any) */
3419   if (nis) {
3420     PetscMPIInt psum;
3421     PetscInt j;
3422     for (j=0,psum=0;j<nis;j++) {
3423       PetscInt plen;
3424       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3425       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3426       psum += len+1; /* indices + lenght */
3427     }
3428     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3429     for (j=0,psum=0;j<nis;j++) {
3430       PetscInt plen;
3431       const PetscInt *is_array_idxs;
3432       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3433       send_buffer_idxs_is[psum] = plen;
3434       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3435       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3436       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3437       psum += plen+1; /* indices + lenght */
3438     }
3439     for (i=0;i<n_sends;i++) {
3440       ilengths_idxs_is[is_indices[i]] = psum;
3441     }
3442     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3443   }
3444 
3445   buf_size_idxs = 0;
3446   buf_size_vals = 0;
3447   buf_size_idxs_is = 0;
3448   for (i=0;i<n_recvs;i++) {
3449     buf_size_idxs += (PetscInt)olengths_idxs[i];
3450     buf_size_vals += (PetscInt)olengths_vals[i];
3451     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3452   }
3453   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3454   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3455   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3456 
3457   /* get new tags for clean communications */
3458   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3459   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3460   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3461 
3462   /* allocate for requests */
3463   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3464   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3465   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3466   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3467   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3468   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3469 
3470   /* communications */
3471   ptr_idxs = recv_buffer_idxs;
3472   ptr_vals = recv_buffer_vals;
3473   ptr_idxs_is = recv_buffer_idxs_is;
3474   for (i=0;i<n_recvs;i++) {
3475     source_dest = onodes[i];
3476     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3477     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3478     ptr_idxs += olengths_idxs[i];
3479     ptr_vals += olengths_vals[i];
3480     if (nis) {
3481       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3482       ptr_idxs_is += olengths_idxs_is[i];
3483     }
3484   }
3485   for (i=0;i<n_sends;i++) {
3486     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3487     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3488     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3489     if (nis) {
3490       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3491     }
3492   }
3493   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3494   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3495 
3496   /* assemble new l2g map */
3497   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3498   ptr_idxs = recv_buffer_idxs;
3499   new_local_rows = 0;
3500   for (i=0;i<n_recvs;i++) {
3501     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3502     ptr_idxs += olengths_idxs[i];
3503   }
3504   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3505   ptr_idxs = recv_buffer_idxs;
3506   new_local_rows = 0;
3507   for (i=0;i<n_recvs;i++) {
3508     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3509     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3510     ptr_idxs += olengths_idxs[i];
3511   }
3512   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3513   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3514   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3515 
3516   /* infer new local matrix type from received local matrices type */
3517   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3518   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3519   if (n_recvs) {
3520     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3521     ptr_idxs = recv_buffer_idxs;
3522     for (i=0;i<n_recvs;i++) {
3523       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3524         new_local_type_private = MATAIJ_PRIVATE;
3525         break;
3526       }
3527       ptr_idxs += olengths_idxs[i];
3528     }
3529     switch (new_local_type_private) {
3530       case MATDENSE_PRIVATE:
3531         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3532           new_local_type = MATSEQAIJ;
3533           bs = 1;
3534         } else { /* if I receive only 1 dense matrix */
3535           new_local_type = MATSEQDENSE;
3536           bs = 1;
3537         }
3538         break;
3539       case MATAIJ_PRIVATE:
3540         new_local_type = MATSEQAIJ;
3541         bs = 1;
3542         break;
3543       case MATBAIJ_PRIVATE:
3544         new_local_type = MATSEQBAIJ;
3545         break;
3546       case MATSBAIJ_PRIVATE:
3547         new_local_type = MATSEQSBAIJ;
3548         break;
3549       default:
3550         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3551         break;
3552     }
3553   } else { /* by default, new_local_type is seqdense */
3554     new_local_type = MATSEQDENSE;
3555     bs = 1;
3556   }
3557 
3558   /* create MATIS object if needed */
3559   if (reuse == MAT_INITIAL_MATRIX) {
3560     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3561     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3562   } else {
3563     /* it also destroys the local matrices */
3564     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3565   }
3566   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3567   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3568 
3569   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3570 
3571   /* Global to local map of received indices */
3572   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3573   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3574   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3575 
3576   /* restore attributes -> type of incoming data and its size */
3577   buf_size_idxs = 0;
3578   for (i=0;i<n_recvs;i++) {
3579     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3580     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3581     buf_size_idxs += (PetscInt)olengths_idxs[i];
3582   }
3583   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3584 
3585   /* set preallocation */
3586   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3587   if (!newisdense) {
3588     PetscInt *new_local_nnz=0;
3589 
3590     ptr_vals = recv_buffer_vals;
3591     ptr_idxs = recv_buffer_idxs_local;
3592     if (n_recvs) {
3593       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3594     }
3595     for (i=0;i<n_recvs;i++) {
3596       PetscInt j;
3597       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3598         for (j=0;j<*(ptr_idxs+1);j++) {
3599           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3600         }
3601       } else {
3602         /* TODO */
3603       }
3604       ptr_idxs += olengths_idxs[i];
3605     }
3606     if (new_local_nnz) {
3607       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3608       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3609       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3610       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3611       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3612       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3613     } else {
3614       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3615     }
3616     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3617   } else {
3618     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3619   }
3620 
3621   /* set values */
3622   ptr_vals = recv_buffer_vals;
3623   ptr_idxs = recv_buffer_idxs_local;
3624   for (i=0;i<n_recvs;i++) {
3625     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3626       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3627       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3628       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3629       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3630       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3631     } else {
3632       /* TODO */
3633     }
3634     ptr_idxs += olengths_idxs[i];
3635     ptr_vals += olengths_vals[i];
3636   }
3637   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3638   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3639   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3640   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3642   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3643 
3644 #if 0
3645   if (!restrict_comm) { /* check */
3646     Vec       lvec,rvec;
3647     PetscReal infty_error;
3648 
3649     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3650     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3651     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3652     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3653     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3654     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3655     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3656     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3657     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3658   }
3659 #endif
3660 
3661   /* assemble new additional is (if any) */
3662   if (nis) {
3663     PetscInt **temp_idxs,*count_is,j,psum;
3664 
3665     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3666     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3667     ptr_idxs = recv_buffer_idxs_is;
3668     psum = 0;
3669     for (i=0;i<n_recvs;i++) {
3670       for (j=0;j<nis;j++) {
3671         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3672         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3673         psum += plen;
3674         ptr_idxs += plen+1; /* shift pointer to received data */
3675       }
3676     }
3677     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3678     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3679     for (i=1;i<nis;i++) {
3680       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3681     }
3682     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3683     ptr_idxs = recv_buffer_idxs_is;
3684     for (i=0;i<n_recvs;i++) {
3685       for (j=0;j<nis;j++) {
3686         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3687         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3688         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3689         ptr_idxs += plen+1; /* shift pointer to received data */
3690       }
3691     }
3692     for (i=0;i<nis;i++) {
3693       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3694       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3695       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3696     }
3697     ierr = PetscFree(count_is);CHKERRQ(ierr);
3698     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3699     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3700   }
3701   /* free workspace */
3702   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3703   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3704   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3705   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3706   if (isdense) {
3707     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3708     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3709   } else {
3710     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3711   }
3712   if (nis) {
3713     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3714     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3715   }
3716   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3717   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3718   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3719   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3720   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3721   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3722   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3723   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3724   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3725   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3726   ierr = PetscFree(onodes);CHKERRQ(ierr);
3727   if (nis) {
3728     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3729     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3730     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3731   }
3732   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3733   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3734     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3735     for (i=0;i<nis;i++) {
3736       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3737     }
3738   }
3739   PetscFunctionReturn(0);
3740 }
3741 
3742 /* temporary hack into ksp private data structure */
3743 #include <petsc/private/kspimpl.h>
3744 
3745 #undef __FUNCT__
3746 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3747 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3748 {
3749   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3750   PC_IS                  *pcis = (PC_IS*)pc->data;
3751   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3752   MatNullSpace           CoarseNullSpace=NULL;
3753   ISLocalToGlobalMapping coarse_islg;
3754   IS                     coarse_is,*isarray;
3755   PetscInt               i,im_active=-1,active_procs=-1;
3756   PetscInt               nis,nisdofs,nisneu;
3757   PC                     pc_temp;
3758   PCType                 coarse_pc_type;
3759   KSPType                coarse_ksp_type;
3760   PetscBool              multilevel_requested,multilevel_allowed;
3761   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3762   Mat                    t_coarse_mat_is;
3763   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3764   PetscMPIInt            all_procs;
3765   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3766   PetscBool              compute_vecs = PETSC_FALSE;
3767   PetscScalar            *array;
3768   PetscErrorCode         ierr;
3769 
3770   PetscFunctionBegin;
3771   /* Assign global numbering to coarse dofs */
3772   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3773     PetscInt ocoarse_size;
3774     compute_vecs = PETSC_TRUE;
3775     ocoarse_size = pcbddc->coarse_size;
3776     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3777     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3778     /* see if we can avoid some work */
3779     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3780       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3781         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3782         coarse_reuse = PETSC_FALSE;
3783       } else { /* we can safely reuse already computed coarse matrix */
3784         coarse_reuse = PETSC_TRUE;
3785       }
3786     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3787       coarse_reuse = PETSC_FALSE;
3788     }
3789     /* reset any subassembling information */
3790     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3791     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3792   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3793     coarse_reuse = PETSC_TRUE;
3794   }
3795 
3796   /* count "active" (i.e. with positive local size) and "void" processes */
3797   im_active = !!(pcis->n);
3798   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3799   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3800   void_procs = all_procs-active_procs;
3801   csin_type_simple = PETSC_TRUE;
3802   redist = PETSC_FALSE;
3803   if (pcbddc->current_level && void_procs) {
3804     csin_ml = PETSC_TRUE;
3805     ncoarse_ml = void_procs;
3806     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3807     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3808       csin_ds = PETSC_TRUE;
3809       ncoarse_ds = pcbddc->redistribute_coarse;
3810       redist = PETSC_TRUE;
3811     } else {
3812       csin_ds = PETSC_TRUE;
3813       ncoarse_ds = active_procs;
3814       redist = PETSC_TRUE;
3815     }
3816   } else {
3817     csin_ml = PETSC_FALSE;
3818     ncoarse_ml = all_procs;
3819     if (void_procs) {
3820       csin_ds = PETSC_TRUE;
3821       ncoarse_ds = void_procs;
3822       csin_type_simple = PETSC_FALSE;
3823     } else {
3824       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3825         csin_ds = PETSC_TRUE;
3826         ncoarse_ds = pcbddc->redistribute_coarse;
3827         redist = PETSC_TRUE;
3828       } else {
3829         csin_ds = PETSC_FALSE;
3830         ncoarse_ds = all_procs;
3831       }
3832     }
3833   }
3834 
3835   /*
3836     test if we can go multilevel: three conditions must be satisfied:
3837     - we have not exceeded the number of levels requested
3838     - we can actually subassemble the active processes
3839     - we can find a suitable number of MPI processes where we can place the subassembled problem
3840   */
3841   multilevel_allowed = PETSC_FALSE;
3842   multilevel_requested = PETSC_FALSE;
3843   if (pcbddc->current_level < pcbddc->max_levels) {
3844     multilevel_requested = PETSC_TRUE;
3845     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3846       multilevel_allowed = PETSC_FALSE;
3847     } else {
3848       multilevel_allowed = PETSC_TRUE;
3849     }
3850   }
3851   /* determine number of process partecipating to coarse solver */
3852   if (multilevel_allowed) {
3853     ncoarse = ncoarse_ml;
3854     csin = csin_ml;
3855     redist = PETSC_FALSE;
3856   } else {
3857     ncoarse = ncoarse_ds;
3858     csin = csin_ds;
3859   }
3860 
3861   /* creates temporary l2gmap and IS for coarse indexes */
3862   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3863   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3864 
3865   /* creates temporary MATIS object for coarse matrix */
3866   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3867   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3868   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3869   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3870 #if 0
3871   {
3872     PetscViewer viewer;
3873     char filename[256];
3874     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3875     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3876     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3877     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3878     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3879   }
3880 #endif
3881   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3882   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3883   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3885   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3886 
3887   /* compute dofs splitting and neumann boundaries for coarse dofs */
3888   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3889     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3890     const PetscInt         *idxs;
3891     ISLocalToGlobalMapping tmap;
3892 
3893     /* create map between primal indices (in local representative ordering) and local primal numbering */
3894     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3895     /* allocate space for temporary storage */
3896     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3897     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3898     /* allocate for IS array */
3899     nisdofs = pcbddc->n_ISForDofsLocal;
3900     nisneu = !!pcbddc->NeumannBoundariesLocal;
3901     nis = nisdofs + nisneu;
3902     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3903     /* dofs splitting */
3904     for (i=0;i<nisdofs;i++) {
3905       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3906       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3907       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3908       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3909       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3910       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3911       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3912       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3913     }
3914     /* neumann boundaries */
3915     if (pcbddc->NeumannBoundariesLocal) {
3916       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3917       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3918       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3919       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3920       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3921       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3922       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3923       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3924     }
3925     /* free memory */
3926     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3927     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3928     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3929   } else {
3930     nis = 0;
3931     nisdofs = 0;
3932     nisneu = 0;
3933     isarray = NULL;
3934   }
3935   /* destroy no longer needed map */
3936   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3937 
3938   /* restrict on coarse candidates (if needed) */
3939   coarse_mat_is = NULL;
3940   if (csin) {
3941     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3942       if (redist) {
3943         PetscMPIInt rank;
3944         PetscInt    spc,n_spc_p1,dest[1],destsize;
3945 
3946         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3947         spc = active_procs/ncoarse;
3948         n_spc_p1 = active_procs%ncoarse;
3949         if (im_active) {
3950           destsize = 1;
3951           if (rank > n_spc_p1*(spc+1)-1) {
3952             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3953           } else {
3954             dest[0] = rank/(spc+1);
3955           }
3956         } else {
3957           destsize = 0;
3958         }
3959         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3960       } else if (csin_type_simple) {
3961         PetscMPIInt rank;
3962         PetscInt    issize,isidx;
3963 
3964         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3965         if (im_active) {
3966           issize = 1;
3967           isidx = (PetscInt)rank;
3968         } else {
3969           issize = 0;
3970           isidx = -1;
3971         }
3972         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3973       } else { /* get a suitable subassembling pattern from MATIS code */
3974         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3975       }
3976 
3977       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
3978       if (!redist || ncoarse <= void_procs) {
3979         PetscInt ncoarse_cand,tissize,*nisindices;
3980         PetscInt *coarse_candidates;
3981         const PetscInt* tisindices;
3982 
3983         /* get coarse candidates' ranks in pc communicator */
3984         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3985         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3986         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
3987           if (!coarse_candidates[i]) {
3988             coarse_candidates[ncoarse_cand++]=i;
3989           }
3990         }
3991         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
3992 
3993 
3994         if (pcbddc->dbg_flag) {
3995           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3996           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3997           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3998           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3999           for (i=0;i<ncoarse_cand;i++) {
4000             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4001           }
4002           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4003           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4004         }
4005         /* shift the pattern on coarse candidates */
4006         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4007         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4008         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4009         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4010         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4011         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4012         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4013       }
4014       if (pcbddc->dbg_flag) {
4015         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4016         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4017         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4018         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4019       }
4020     }
4021     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4022     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4023   } else {
4024     if (pcbddc->dbg_flag) {
4025       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4026       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4027       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4028     }
4029     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4030     coarse_mat_is = t_coarse_mat_is;
4031   }
4032 
4033   /* create local to global scatters for coarse problem */
4034   if (compute_vecs) {
4035     PetscInt lrows;
4036     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4037     if (coarse_mat_is) {
4038       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4039     } else {
4040       lrows = 0;
4041     }
4042     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4043     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4044     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4045     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4046     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4047   }
4048   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4049   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4050 
4051   /* set defaults for coarse KSP and PC */
4052   if (multilevel_allowed) {
4053     coarse_ksp_type = KSPRICHARDSON;
4054     coarse_pc_type = PCBDDC;
4055   } else {
4056     coarse_ksp_type = KSPPREONLY;
4057     coarse_pc_type = PCREDUNDANT;
4058   }
4059 
4060   /* print some info if requested */
4061   if (pcbddc->dbg_flag) {
4062     if (!multilevel_allowed) {
4063       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4064       if (multilevel_requested) {
4065         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4066       } else if (pcbddc->max_levels) {
4067         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4068       }
4069       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4070     }
4071   }
4072 
4073   /* create the coarse KSP object only once with defaults */
4074   if (coarse_mat_is) {
4075     MatReuse coarse_mat_reuse;
4076     PetscViewer dbg_viewer = NULL;
4077     if (pcbddc->dbg_flag) {
4078       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4079       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4080     }
4081     if (!pcbddc->coarse_ksp) {
4082       char prefix[256],str_level[16];
4083       size_t len;
4084       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4085       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4086       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4087       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4088       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4089       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4090       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4091       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4092       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4093       /* prefix */
4094       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4095       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4096       if (!pcbddc->current_level) {
4097         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4098         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4099       } else {
4100         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4101         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4102         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4103         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4104         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4105         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4106       }
4107       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4108       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4109       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4110       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4111       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4112       /* allow user customization */
4113       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4114     }
4115     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4116     if (nisdofs) {
4117       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4118       for (i=0;i<nisdofs;i++) {
4119         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4120       }
4121     }
4122     if (nisneu) {
4123       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4124       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4125     }
4126 
4127     /* get some info after set from options */
4128     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4129     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4130     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4131     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4132     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4133       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4134       isbddc = PETSC_FALSE;
4135     }
4136     if (isredundant) {
4137       KSP inner_ksp;
4138       PC inner_pc;
4139       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4140       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4141       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4142     }
4143 
4144     /* assemble coarse matrix */
4145     if (coarse_reuse) {
4146       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4147       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4148       coarse_mat_reuse = MAT_REUSE_MATRIX;
4149     } else {
4150       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4151     }
4152     if (isbddc || isnn) {
4153       if (pcbddc->coarsening_ratio > 1) {
4154         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4155           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4156           if (pcbddc->dbg_flag) {
4157             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4158             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4159             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4160             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4161           }
4162         }
4163         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4164       } else {
4165         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4166         coarse_mat = coarse_mat_is;
4167       }
4168     } else {
4169       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4170     }
4171     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4172 
4173     /* propagate symmetry info to coarse matrix */
4174     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4175     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4176 
4177     /* set operators */
4178     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4179     if (pcbddc->dbg_flag) {
4180       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4181     }
4182   } else { /* processes non partecipating to coarse solver (if any) */
4183     coarse_mat = 0;
4184   }
4185   ierr = PetscFree(isarray);CHKERRQ(ierr);
4186 #if 0
4187   {
4188     PetscViewer viewer;
4189     char filename[256];
4190     sprintf(filename,"coarse_mat.m");
4191     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4192     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4193     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4194     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4195   }
4196 #endif
4197 
4198   /* Compute coarse null space (special handling by BDDC only) */
4199   if (pcbddc->NullSpace) {
4200     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4201   }
4202 
4203   if (pcbddc->coarse_ksp) {
4204     Vec crhs,csol;
4205     PetscBool ispreonly;
4206     if (CoarseNullSpace) {
4207       if (isbddc) {
4208         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4209       } else {
4210         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4211       }
4212     }
4213     /* setup coarse ksp */
4214     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4215     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4216     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4217     /* hack */
4218     if (!csol) {
4219       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4220     }
4221     if (!crhs) {
4222       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4223     }
4224     /* Check coarse problem if in debug mode or if solving with an iterative method */
4225     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4226     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4227       KSP       check_ksp;
4228       KSPType   check_ksp_type;
4229       PC        check_pc;
4230       Vec       check_vec,coarse_vec;
4231       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4232       PetscInt  its;
4233       PetscBool compute_eigs;
4234       PetscReal *eigs_r,*eigs_c;
4235       PetscInt  neigs;
4236       const char *prefix;
4237 
4238       /* Create ksp object suitable for estimation of extreme eigenvalues */
4239       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4240       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4241       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4242       if (ispreonly) {
4243         check_ksp_type = KSPPREONLY;
4244         compute_eigs = PETSC_FALSE;
4245       } else {
4246         check_ksp_type = KSPGMRES;
4247         compute_eigs = PETSC_TRUE;
4248       }
4249       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4250       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4251       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4252       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4253       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4254       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4255       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4256       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4257       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4258       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4259       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4260       /* create random vec */
4261       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4262       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4263       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4264       if (CoarseNullSpace) {
4265         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4266       }
4267       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4268       /* solve coarse problem */
4269       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4270       if (CoarseNullSpace) {
4271         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4272       }
4273       /* set eigenvalue estimation if preonly has not been requested */
4274       if (compute_eigs) {
4275         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4276         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4277         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4278         lambda_max = eigs_r[neigs-1];
4279         lambda_min = eigs_r[0];
4280         if (pcbddc->use_coarse_estimates) {
4281           if (lambda_max>lambda_min) {
4282             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4283             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4284           }
4285         }
4286       }
4287 
4288       /* check coarse problem residual error */
4289       if (pcbddc->dbg_flag) {
4290         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4291         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4292         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4293         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4294         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4295         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4296         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4297         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4298         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4299         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4300         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4301         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4302         if (compute_eigs) {
4303           PetscReal lambda_max_s,lambda_min_s;
4304           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4305           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4306           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4307           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4308           for (i=0;i<neigs;i++) {
4309             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4310           }
4311         }
4312         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4313         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4314       }
4315       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4316       if (compute_eigs) {
4317         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4318         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4319       }
4320     }
4321   }
4322   /* print additional info */
4323   if (pcbddc->dbg_flag) {
4324     /* waits until all processes reaches this point */
4325     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4328   }
4329 
4330   /* free memory */
4331   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4332   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4333   PetscFunctionReturn(0);
4334 }
4335 
4336 #undef __FUNCT__
4337 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4338 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4339 {
4340   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4341   PC_IS*         pcis = (PC_IS*)pc->data;
4342   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4343   PetscInt       i,coarse_size=0;
4344   PetscInt       *local_primal_indices=NULL;
4345   PetscErrorCode ierr;
4346 
4347   PetscFunctionBegin;
4348   /* Compute global number of coarse dofs */
4349   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4350     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4351   }
4352   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4353 
4354   /* check numbering */
4355   if (pcbddc->dbg_flag) {
4356     PetscScalar coarsesum,*array;
4357     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4358 
4359     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4360     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4361     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4362     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4363     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4364     for (i=0;i<pcbddc->local_primal_size;i++) {
4365       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4366     }
4367     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4368     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4369     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4370     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4371     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4372     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4373     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4374     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4375     for (i=0;i<pcis->n;i++) {
4376       if (array[i] == 1.0) {
4377         set_error = PETSC_TRUE;
4378         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4379       }
4380     }
4381     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4382     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4383     for (i=0;i<pcis->n;i++) {
4384       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4385     }
4386     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4387     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4388     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4389     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4390     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4391     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4392     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4393       PetscInt *gidxs;
4394 
4395       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4396       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4397       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4398       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4399       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4400       for (i=0;i<pcbddc->local_primal_size;i++) {
4401         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4402       }
4403       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4404       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4405     }
4406     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4407     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4408   }
4409   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4410   /* get back data */
4411   *coarse_size_n = coarse_size;
4412   *local_primal_indices_n = local_primal_indices;
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "PCBDDCGlobalToLocal"
4418 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4419 {
4420   IS             localis_t;
4421   PetscInt       i,lsize,*idxs,n;
4422   PetscScalar    *vals;
4423   PetscErrorCode ierr;
4424 
4425   PetscFunctionBegin;
4426   /* get indices in local ordering exploiting local to global map */
4427   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4428   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4429   for (i=0;i<lsize;i++) vals[i] = 1.0;
4430   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4431   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4432   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4433   if (idxs) { /* multilevel guard */
4434     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4435   }
4436   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4437   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4438   ierr = PetscFree(vals);CHKERRQ(ierr);
4439   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4440   /* now compute set in local ordering */
4441   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4442   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4443   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4444   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4445   for (i=0,lsize=0;i<n;i++) {
4446     if (PetscRealPart(vals[i]) > 0.5) {
4447       lsize++;
4448     }
4449   }
4450   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4451   for (i=0,lsize=0;i<n;i++) {
4452     if (PetscRealPart(vals[i]) > 0.5) {
4453       idxs[lsize++] = i;
4454     }
4455   }
4456   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4457   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4458   *localis = localis_t;
4459   PetscFunctionReturn(0);
4460 }
4461 
4462 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4463 #undef __FUNCT__
4464 #define __FUNCT__ "PCBDDCMatMult_Private"
4465 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4466 {
4467   PCBDDCChange_ctx change_ctx;
4468   PetscErrorCode   ierr;
4469 
4470   PetscFunctionBegin;
4471   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4472   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4473   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4474   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4475   PetscFunctionReturn(0);
4476 }
4477 
4478 #undef __FUNCT__
4479 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4480 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4481 {
4482   PCBDDCChange_ctx change_ctx;
4483   PetscErrorCode   ierr;
4484 
4485   PetscFunctionBegin;
4486   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4487   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4488   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4489   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 #undef __FUNCT__
4494 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4495 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4496 {
4497   PC_IS               *pcis=(PC_IS*)pc->data;
4498   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4499   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4500   Mat                 S_j;
4501   PetscInt            *used_xadj,*used_adjncy;
4502   PetscBool           free_used_adj;
4503   PetscErrorCode      ierr;
4504 
4505   PetscFunctionBegin;
4506   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4507   free_used_adj = PETSC_FALSE;
4508   if (pcbddc->sub_schurs_layers == -1) {
4509     used_xadj = NULL;
4510     used_adjncy = NULL;
4511   } else {
4512     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4513       used_xadj = pcbddc->mat_graph->xadj;
4514       used_adjncy = pcbddc->mat_graph->adjncy;
4515     } else if (pcbddc->computed_rowadj) {
4516       used_xadj = pcbddc->mat_graph->xadj;
4517       used_adjncy = pcbddc->mat_graph->adjncy;
4518     } else {
4519       PetscBool      flg_row=PETSC_FALSE;
4520       const PetscInt *xadj,*adjncy;
4521       PetscInt       nvtxs;
4522 
4523       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4524       if (flg_row) {
4525         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4526         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4527         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4528         free_used_adj = PETSC_TRUE;
4529       } else {
4530         pcbddc->sub_schurs_layers = -1;
4531         used_xadj = NULL;
4532         used_adjncy = NULL;
4533       }
4534       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4535     }
4536   }
4537 
4538   /* setup sub_schurs data */
4539   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4540   if (!sub_schurs->use_mumps) {
4541     /* pcbddc->ksp_D up to date only if not using MUMPS */
4542     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4543     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4544   } else {
4545     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4546   }
4547   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4548 
4549   /* free adjacency */
4550   if (free_used_adj) {
4551     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4552   }
4553   PetscFunctionReturn(0);
4554 }
4555 
4556 #undef __FUNCT__
4557 #define __FUNCT__ "PCBDDCInitSubSchurs"
4558 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4559 {
4560   PC_IS               *pcis=(PC_IS*)pc->data;
4561   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4562   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4563   PCBDDCGraph         graph;
4564   PetscErrorCode      ierr;
4565 
4566   PetscFunctionBegin;
4567   /* attach interface graph for determining subsets */
4568   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4569     IS verticesIS;
4570 
4571     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4572     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4573     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4574     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4575     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4576     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4577 /*
4578     if (pcbddc->dbg_flag) {
4579       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4580     }
4581 */
4582   } else {
4583     graph = pcbddc->mat_graph;
4584   }
4585 
4586   /* sub_schurs init */
4587   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4588 
4589   /* free graph struct */
4590   if (pcbddc->sub_schurs_rebuild) {
4591     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4592   }
4593   PetscFunctionReturn(0);
4594 }
4595