xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 088faed80bd8c1bb2fa3aa39c340cd4a0c87e33a)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
124                       nv+cum,&pcbddc->adaptive_constraints_idxs,
125                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
126   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   maxneigs = 0;
129   cum = cum2 = cumarray = 0;
130   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
131     const PetscInt *idxs;
132 
133     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
134     for (cum=0;cum<nv;cum++) {
135       pcbddc->adaptive_constraints_n[cum] = 1;
136       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
137       pcbddc->adaptive_constraints_ptrs[cum] = cum;
138       pcbddc->adaptive_constraints_data[cum] = 1.0;
139     }
140     cum2 = cum;
141     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
142   }
143 
144   if (mss) { /* multilevel */
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
147   }
148 
149   for (i=0;i<sub_schurs->n_subs;i++) {
150     PetscInt j,subset_size;
151 
152     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
153     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
154       const PetscInt *idxs;
155       PetscReal      infty = PETSC_MAX_REAL;
156       PetscInt       eigs_start = 0;
157       PetscBLASInt   B_N;
158 
159       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
160       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
161         if (sub_schurs->is_hermitian) {
162           PetscInt j;
163           for (j=0;j<subset_size;j++) {
164             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
165           }
166           for (j=0;j<subset_size;j++) {
167             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
168           }
169         } else {
170           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         }
173       } else {
174         S = Sarray + cumarray;
175         St = Starray + cumarray;
176       }
177 
178       /* Threshold: this is an heuristic for edges */
179       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
180       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
181 
182       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
183         PetscBLASInt B_itype = 1;
184         PetscBLASInt B_IL, B_IU;
185         PetscReal    eps = -1.0; /* dlamch? */
186         PetscInt     nmin_s;
187 
188         /* ask for eigenvalues larger than thresh */
189         if (pcbddc->dbg_flag) {
190           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
191         }
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           eigs_start = B_neigs -nmax;
214           B_neigs = nmax;
215         }
216 
217         nmin_s = PetscMin(nmin,B_N);
218         if (B_neigs < nmin_s) {
219           PetscBLASInt B_neigs2;
220 
221           B_IU = B_N - B_neigs;
222           B_IL = B_N - nmin_s + 1;
223           if (pcbddc->dbg_flag) {
224             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
225           }
226           if (sub_schurs->is_hermitian) {
227             PetscInt j;
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231             for (j=0;j<subset_size;j++) {
232               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
233             }
234           } else {
235             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237           }
238           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
239 #if defined(PETSC_USE_COMPLEX)
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
241 #else
242           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
243 #endif
244           ierr = PetscFPTrapPop();CHKERRQ(ierr);
245           B_neigs += B_neigs2;
246         }
247         if (B_ierr) {
248           if (B_ierr < 0 ) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
250           } else if (B_ierr <= B_N) {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
252           } else {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
254           }
255         }
256         if (pcbddc->dbg_flag) {
257           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
258           for (j=0;j<B_neigs;j++) {
259             if (eigs[j] == 0.0) {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
261             } else {
262               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
263             }
264           }
265         }
266       } else {
267           /* TODO */
268       }
269 
270       maxneigs = PetscMax(B_neigs,maxneigs);
271       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+cum2]);
280           }
281         }
282       }
283       for (j=0;j<B_neigs;j++) {
284 #if 0
285         {
286           PetscBLASInt Blas_N,Blas_one = 1.0;
287           PetscScalar norm;
288           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
289           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
290           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
291             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
292           } else {
293             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
294           }
295           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
296         }
297 #endif
298         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
299         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
300         cum2 += subset_size;
301       }
302       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
303     }
304     /* shift for next computation */
305     cumarray += subset_size*subset_size;
306   }
307   if (pcbddc->dbg_flag) {
308     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
309   }
310   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
311 
312   if (mss) {
313     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
314     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
315     /* destroy matrices (junk) */
316     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
317     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
318   }
319   if (allocated_S_St) {
320     ierr = PetscFree2(S,St);CHKERRQ(ierr);
321   }
322   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     Mat         C_B,CPHI;
974     IS          is_dummy;
975     Vec         mones;
976     MatType     checkmattype=MATSEQAIJ;
977     PetscReal   real_value;
978 
979     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
981     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
982     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
983     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
984     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
985     if (unsymmetric_check) {
986       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
987       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
988     }
989     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
990 
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
993     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
994     if (unsymmetric_check) {
995       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1006       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1007     } else {
1008       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1009       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1014       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1015       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1016     }
1017     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1021     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1024     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1025 
1026     /* check constraints */
1027     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1028     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1029     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1030     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1031     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1032     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1033     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1034     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1035     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1036     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1038     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1039 
1040     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1041     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1042     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1043     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1044     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1045     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1046     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1047     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1048     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1049     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1050     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1053       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1054     }
1055     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1056   }
1057 
1058   /* free memory */
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (n_vertices) {
1061     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1062   }
1063   if (n_constraints) {
1064     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1065   }
1066   /* get back data */
1067   *coarse_submat_vals_n = coarse_submat_vals;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1073 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1074 {
1075   Mat            *work_mat;
1076   IS             isrow_s,iscol_s;
1077   PetscBool      rsorted,csorted;
1078   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1083   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1084   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1085   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1086 
1087   if (!rsorted) {
1088     const PetscInt *idxs;
1089     PetscInt *idxs_sorted,i;
1090 
1091     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1092     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1093     for (i=0;i<rsize;i++) {
1094       idxs_perm_r[i] = i;
1095     }
1096     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1097     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1098     for (i=0;i<rsize;i++) {
1099       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1100     }
1101     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1102     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1105     isrow_s = isrow;
1106   }
1107 
1108   if (!csorted) {
1109     if (isrow == iscol) {
1110       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1111       iscol_s = isrow_s;
1112     } else {
1113       const PetscInt *idxs;
1114       PetscInt *idxs_sorted,i;
1115 
1116       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1117       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1118       for (i=0;i<csize;i++) {
1119         idxs_perm_c[i] = i;
1120       }
1121       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1122       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1123       for (i=0;i<csize;i++) {
1124         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1125       }
1126       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1127       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1128     }
1129   } else {
1130     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1131     iscol_s = iscol;
1132   }
1133 
1134   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1135 
1136   if (!rsorted || !csorted) {
1137     Mat      new_mat;
1138     IS       is_perm_r,is_perm_c;
1139 
1140     if (!rsorted) {
1141       PetscInt *idxs_r,i;
1142       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1143       for (i=0;i<rsize;i++) {
1144         idxs_r[idxs_perm_r[i]] = i;
1145       }
1146       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1147       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1148     } else {
1149       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1150     }
1151     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1152 
1153     if (!csorted) {
1154       if (isrow_s == iscol_s) {
1155         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1156         is_perm_c = is_perm_r;
1157       } else {
1158         PetscInt *idxs_c,i;
1159         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1160         for (i=0;i<csize;i++) {
1161           idxs_c[idxs_perm_c[i]] = i;
1162         }
1163         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1164         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1165       }
1166     } else {
1167       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1168     }
1169     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1170 
1171     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1173     work_mat[0] = new_mat;
1174     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1175     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1176   }
1177 
1178   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1179   *B = work_mat[0];
1180   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1181   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1182   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1188 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1189 {
1190   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1192   Mat            new_mat;
1193   IS             is_local,is_global;
1194   PetscInt       local_size;
1195   PetscBool      isseqaij;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1200   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1201   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1202   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1203   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1205   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1206 
1207   /* check */
1208   if (pcbddc->dbg_flag) {
1209     Vec       x,x_change;
1210     PetscReal error;
1211 
1212     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1213     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1214     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1215     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1221     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1222     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1223     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1224     ierr = VecDestroy(&x);CHKERRQ(ierr);
1225     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1226   }
1227 
1228   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1229   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1230   if (isseqaij) {
1231     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232   } else {
1233     Mat work_mat;
1234     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1235     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1237   }
1238   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1239 #if !defined(PETSC_USE_COMPLEX)
1240   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1241 #endif
1242   /*
1243   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1244   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1245   */
1246   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1252 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1253 {
1254   PC_IS*         pcis = (PC_IS*)(pc->data);
1255   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1256   IS             is_aux1,is_aux2;
1257   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1258   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1259   PetscInt       vbs,bs;
1260   PetscBT        bitmask;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   /*
1265     No need to setup local scatters if
1266       - primal space is unchanged
1267         AND
1268       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1269         AND
1270       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1271   */
1272   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1273     PetscFunctionReturn(0);
1274   }
1275   /* destroy old objects */
1276   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1278   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1279   /* Set Non-overlapping dimensions */
1280   n_B = pcis->n_B; n_D = pcis->n - n_B;
1281   n_vertices = pcbddc->n_actual_vertices;
1282   /* create auxiliary bitmask */
1283   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1284   for (i=0;i<n_vertices;i++) {
1285     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1286   }
1287 
1288   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1289   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1290   for (i=0, n_R=0; i<pcis->n; i++) {
1291     if (!PetscBTLookup(bitmask,i)) {
1292       idx_R_local[n_R] = i;
1293       n_R++;
1294     }
1295   }
1296 
1297   /* Block code */
1298   vbs = 1;
1299   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1300   if (bs>1 && !(n_vertices%bs)) {
1301     PetscBool is_blocked = PETSC_TRUE;
1302     PetscInt  *vary;
1303     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1304     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1305     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1306     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1307     for (i=0; i<n_vertices/bs; i++) {
1308       if (vary[i]!=0 && vary[i]!=bs) {
1309         is_blocked = PETSC_FALSE;
1310         break;
1311       }
1312     }
1313     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1314       vbs = bs;
1315       for (i=0;i<n_R/vbs;i++) {
1316         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1317       }
1318     }
1319     ierr = PetscFree(vary);CHKERRQ(ierr);
1320   }
1321   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1322   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1323 
1324   /* print some info if requested */
1325   if (pcbddc->dbg_flag) {
1326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1329     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1330     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1331     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1332     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1333     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1334   }
1335 
1336   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1337   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1339   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1340   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0; i<n_D; i++) {
1342     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1343   }
1344   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0, j=0; i<n_R; i++) {
1346     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1347       aux_array1[j++] = i;
1348     }
1349   }
1350   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1352   for (i=0, j=0; i<n_B; i++) {
1353     if (!PetscBTLookup(bitmask,is_indices[i])) {
1354       aux_array2[j++] = i;
1355     }
1356   }
1357   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1358   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1359   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1360   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1361   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1362 
1363   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1364     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1365     for (i=0, j=0; i<n_R; i++) {
1366       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1367         aux_array1[j++] = i;
1368       }
1369     }
1370     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1371     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1372     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1373   }
1374   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1375   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1382 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1383 {
1384   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1385   PC_IS          *pcis = (PC_IS*)pc->data;
1386   PC             pc_temp;
1387   Mat            A_RR;
1388   MatReuse       reuse;
1389   PetscScalar    m_one = -1.0;
1390   PetscReal      value;
1391   PetscInt       n_D,n_R,ibs,mbs;
1392   PetscBool      use_exact,use_exact_reduced,issbaij;
1393   PetscErrorCode ierr;
1394   /* prefixes stuff */
1395   char           dir_prefix[256],neu_prefix[256],str_level[16];
1396   size_t         len;
1397 
1398   PetscFunctionBegin;
1399 
1400   /* compute prefixes */
1401   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1402   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1403   if (!pcbddc->current_level) {
1404     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1405     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1406     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1407     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1410     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1411     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1412     len -= 15; /* remove "pc_bddc_coarse_" */
1413     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1414     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1415     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1416     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1417     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1418     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1419     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1420     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1421   }
1422 
1423   /* DIRICHLET PROBLEM */
1424   if (dirichlet) {
1425     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1426     if (pcbddc->issym) {
1427       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1428     }
1429     /* Matrix for Dirichlet problem is pcis->A_II */
1430     n_D = pcis->n - pcis->n_B;
1431     if (!pcbddc->ksp_D) { /* create object if not yet build */
1432       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1433       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1434       /* default */
1435       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1436       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1437       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1439       if (issbaij) {
1440         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1441       } else {
1442         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1443       }
1444       /* Allow user's customization */
1445       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1446       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1447     }
1448     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1449     if (sub_schurs->interior_solver) {
1450       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1451     }
1452     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1453     if (!n_D) {
1454       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1455       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1456     }
1457     /* Set Up KSP for Dirichlet problem of BDDC */
1458     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1459     /* set ksp_D into pcis data */
1460     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1461     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1462     pcis->ksp_D = pcbddc->ksp_D;
1463   }
1464 
1465   /* NEUMANN PROBLEM */
1466   A_RR = 0;
1467   if (neumann) {
1468     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1469     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1470     if (pcbddc->ksp_R) { /* already created ksp */
1471       PetscInt nn_R;
1472       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1473       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1474       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1475       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1476         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1477         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478         reuse = MAT_INITIAL_MATRIX;
1479       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1480         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1481           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482           reuse = MAT_INITIAL_MATRIX;
1483         } else { /* safe to reuse the matrix */
1484           reuse = MAT_REUSE_MATRIX;
1485         }
1486       }
1487       /* last check */
1488       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1489         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1490         reuse = MAT_INITIAL_MATRIX;
1491       }
1492     } else { /* first time, so we need to create the matrix */
1493       reuse = MAT_INITIAL_MATRIX;
1494     }
1495     /* extract A_RR */
1496     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1497     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1498     if (ibs != mbs) {
1499       Mat newmat;
1500       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1501       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1502       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1505     }
1506     if (pcbddc->issym) {
1507       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1508     }
1509     if (!pcbddc->ksp_R) { /* create object if not present */
1510       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1511       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1512       /* default */
1513       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1514       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1516       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1517       if (issbaij) {
1518         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1519       } else {
1520         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1521       }
1522       /* Allow user's customization */
1523       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1524       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1527     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1528     if (!n_R) {
1529       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1530       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1531     }
1532     /* Set Up KSP for Neumann problem of BDDC */
1533     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1534   }
1535 
1536   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1537   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1538     if (pcbddc->dbg_flag) {
1539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1542     }
1543     if (dirichlet) { /* Dirichlet */
1544       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1545       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1546       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1547       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1548       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1549       /* need to be adapted? */
1550       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1551       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1552       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1553       /* print info */
1554       if (pcbddc->dbg_flag) {
1555         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1556         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1557       }
1558       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1559         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1560       }
1561     }
1562     if (neumann) { /* Neumann */
1563       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1564       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1565       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1566       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1567       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1568       /* need to be adapted? */
1569       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1570       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1571       /* print info */
1572       if (pcbddc->dbg_flag) {
1573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1574         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1575       }
1576       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1577         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1578       }
1579     }
1580   }
1581   /* free Neumann problem's matrix */
1582   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1588 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1589 {
1590   PetscErrorCode ierr;
1591   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1592 
1593   PetscFunctionBegin;
1594   if (applytranspose) {
1595     if (pcbddc->local_auxmat1) {
1596       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1597       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1598     }
1599     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1600   } else {
1601     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1602     if (pcbddc->local_auxmat1) {
1603       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1604       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1605     }
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1611 #undef __FUNCT__
1612 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1613 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1614 {
1615   PetscErrorCode ierr;
1616   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1617   PC_IS*            pcis = (PC_IS*)  (pc->data);
1618   const PetscScalar zero = 0.0;
1619 
1620   PetscFunctionBegin;
1621   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1622   if (applytranspose) {
1623     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1624     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1625   } else {
1626     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1628   }
1629   /* start communications from local primal nodes to rhs of coarse solver */
1630   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1631   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1635   /* TODO remove null space when doing multilevel */
1636   if (pcbddc->coarse_ksp) {
1637     Vec rhs,sol;
1638 
1639     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1640     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1641     if (applytranspose) {
1642       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1643     } else {
1644       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1645     }
1646   }
1647 
1648   /* Local solution on R nodes */
1649   if (pcis->n) {
1650     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     }
1657     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1658     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (pcbddc->switch_static) {
1662       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     }
1665   }
1666 
1667   /* communications from coarse sol to local primal nodes */
1668   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1669   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1670 
1671   /* Sum contributions from two levels */
1672   if (applytranspose) {
1673     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1674     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1675   } else {
1676     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1677     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1684 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1685 {
1686   PetscErrorCode ierr;
1687   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1688   PetscScalar    *array;
1689   Vec            from,to;
1690 
1691   PetscFunctionBegin;
1692   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1693     from = pcbddc->coarse_vec;
1694     to = pcbddc->vec1_P;
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697 
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1700       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1702       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1703       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1704     }
1705   } else { /* from local to global -> put data in coarse right hand side */
1706     from = pcbddc->vec1_P;
1707     to = pcbddc->coarse_vec;
1708   }
1709   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1715 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1716 {
1717   PetscErrorCode ierr;
1718   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1719   PetscScalar    *array;
1720   Vec            from,to;
1721 
1722   PetscFunctionBegin;
1723   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1724     from = pcbddc->coarse_vec;
1725     to = pcbddc->vec1_P;
1726   } else { /* from local to global -> put data in coarse right hand side */
1727     from = pcbddc->vec1_P;
1728     to = pcbddc->coarse_vec;
1729   }
1730   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1731   if (smode == SCATTER_FORWARD) {
1732     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1733       Vec tvec;
1734 
1735       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1736       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1737       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1738       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1739     }
1740   } else {
1741     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1742      ierr = VecResetArray(from);CHKERRQ(ierr);
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* uncomment for testing purposes */
1749 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1750 #undef __FUNCT__
1751 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1752 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1753 {
1754   PetscErrorCode    ierr;
1755   PC_IS*            pcis = (PC_IS*)(pc->data);
1756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1758   /* one and zero */
1759   PetscScalar       one=1.0,zero=0.0;
1760   /* space to store constraints and their local indices */
1761   PetscScalar       *temp_quadrature_constraint;
1762   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1763   /* iterators */
1764   PetscInt          i,j,k,total_counts,temp_start_ptr;
1765   /* BLAS integers */
1766   PetscBLASInt      lwork,lierr;
1767   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1768   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1769   /* reuse */
1770   PetscInt          olocal_primal_size;
1771   PetscInt          *oprimal_indices_local_idxs;
1772   /* change of basis */
1773   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1774   PetscBool         boolforchange,qr_needed;
1775   PetscBT           touched,change_basis,qr_needed_idx;
1776   /* auxiliary stuff */
1777   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1778   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1779   PetscScalar       *temp_quadrature_constraint_work=NULL;
1780   /* some quantities */
1781   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1782   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1783 
1784   PetscFunctionBegin;
1785   /* Destroy Mat objects computed previously */
1786   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1787   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1788   /* save info on constraints from previous setup (if any) */
1789   olocal_primal_size = pcbddc->local_primal_size;
1790   ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
1791   ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
1792   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1793 
1794   /* print some info */
1795   if (pcbddc->dbg_flag) {
1796     IS       vertices;
1797     PetscInt nv,nedges,nfaces;
1798     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1799     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1800     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1801     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1804     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1805     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1806     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1807   }
1808 
1809   if (!pcbddc->adaptive_selection) {
1810     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1811     MatNullSpace nearnullsp;
1812     const Vec    *nearnullvecs;
1813     Vec          *localnearnullsp;
1814     PetscScalar  *array;
1815     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1816     PetscBool    nnsp_has_cnst;
1817     /* LAPACK working arrays for SVD or POD */
1818     PetscBool    skip_lapack;
1819     PetscScalar  *work;
1820     PetscReal    *singular_vals;
1821 #if defined(PETSC_USE_COMPLEX)
1822     PetscReal    *rwork;
1823 #endif
1824 #if defined(PETSC_MISSING_LAPACK_GESVD)
1825     PetscScalar  *temp_basis,*correlation_mat;
1826 #else
1827     PetscBLASInt dummy_int=1;
1828     PetscScalar  dummy_scalar=1.;
1829 #endif
1830 
1831     /* Get index sets for faces, edges and vertices from graph */
1832     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1833     /* free unneeded index sets */
1834     if (!pcbddc->use_vertices) {
1835       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1836     }
1837     if (!pcbddc->use_edges) {
1838       for (i=0;i<n_ISForEdges;i++) {
1839         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1840       }
1841       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1842       n_ISForEdges = 0;
1843     }
1844     if (!pcbddc->use_faces) {
1845       for (i=0;i<n_ISForFaces;i++) {
1846         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1847       }
1848       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1849       n_ISForFaces = 0;
1850     }
1851     /* HACKS (the following two blocks of code) */
1852     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1853       pcbddc->use_change_of_basis = PETSC_TRUE;
1854       if (!ISForEdges) {
1855         pcbddc->use_change_on_faces = PETSC_TRUE;
1856       }
1857     }
1858     if (pcbddc->NullSpace) {
1859       /* use_change_of_basis should be consistent among processors */
1860       PetscBool tbool[2],gbool[2];
1861       tbool[0] = pcbddc->use_change_of_basis;
1862       tbool[1] = pcbddc->use_change_on_faces;
1863       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1864       pcbddc->use_change_of_basis = gbool[0];
1865       pcbddc->use_change_on_faces = gbool[1];
1866     }
1867 
1868     /* check if near null space is attached to global mat */
1869     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1870     if (nearnullsp) {
1871       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1872       /* remove any stored info */
1873       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1874       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1875       /* store information for BDDC solver reuse */
1876       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1877       pcbddc->onearnullspace = nearnullsp;
1878       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1879       for (i=0;i<nnsp_size;i++) {
1880         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1881       }
1882     } else { /* if near null space is not provided BDDC uses constants by default */
1883       nnsp_size = 0;
1884       nnsp_has_cnst = PETSC_TRUE;
1885     }
1886     /* get max number of constraints on a single cc */
1887     max_constraints = nnsp_size;
1888     if (nnsp_has_cnst) max_constraints++;
1889 
1890     /*
1891          Evaluate maximum storage size needed by the procedure
1892          - temp_indices will contain start index of each constraint stored as follows
1893          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1894          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1895          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1896                                                                                                                                                            */
1897     total_counts = n_ISForFaces+n_ISForEdges;
1898     total_counts *= max_constraints;
1899     n_vertices = 0;
1900     if (ISForVertices) {
1901       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1902     }
1903     total_counts += n_vertices;
1904     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1905     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1906     total_counts = 0;
1907     max_size_of_constraint = 0;
1908     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1909       if (i<n_ISForEdges) {
1910         used_IS = &ISForEdges[i];
1911       } else {
1912         used_IS = &ISForFaces[i-n_ISForEdges];
1913       }
1914       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1915       total_counts += j;
1916       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1917     }
1918     total_counts *= max_constraints;
1919     total_counts += n_vertices;
1920     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1921     /* get local part of global near null space vectors */
1922     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1923     for (k=0;k<nnsp_size;k++) {
1924       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1925       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1926       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1927     }
1928 
1929     /* whether or not to skip lapack calls */
1930     skip_lapack = PETSC_TRUE;
1931     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1932 
1933     /* allocate some auxiliary stuff */
1934     if (!skip_lapack || pcbddc->use_qr_single) {
1935       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1936     } else {
1937       gidxs = NULL;
1938       permutation = NULL;
1939       temp_indices_to_constraint_work = NULL;
1940       temp_quadrature_constraint_work = NULL;
1941     }
1942 
1943     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1944     if (!skip_lapack) {
1945       PetscScalar temp_work;
1946 
1947 #if defined(PETSC_MISSING_LAPACK_GESVD)
1948       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1949       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1950       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1951       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1952 #if defined(PETSC_USE_COMPLEX)
1953       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1954 #endif
1955       /* now we evaluate the optimal workspace using query with lwork=-1 */
1956       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1957       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1958       lwork = -1;
1959       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1960 #if !defined(PETSC_USE_COMPLEX)
1961       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1962 #else
1963       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1964 #endif
1965       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1966       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1967 #else /* on missing GESVD */
1968       /* SVD */
1969       PetscInt max_n,min_n;
1970       max_n = max_size_of_constraint;
1971       min_n = max_constraints;
1972       if (max_size_of_constraint < max_constraints) {
1973         min_n = max_size_of_constraint;
1974         max_n = max_constraints;
1975       }
1976       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1977 #if defined(PETSC_USE_COMPLEX)
1978       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1979 #endif
1980       /* now we evaluate the optimal workspace using query with lwork=-1 */
1981       lwork = -1;
1982       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1983       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1984       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1985       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1986 #if !defined(PETSC_USE_COMPLEX)
1987       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1988 #else
1989       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1990 #endif
1991       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1992       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1993 #endif /* on missing GESVD */
1994       /* Allocate optimal workspace */
1995       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1996       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1997     }
1998     /* Now we can loop on constraining sets */
1999     total_counts = 0;
2000     temp_indices[0] = 0;
2001     /* vertices */
2002     if (ISForVertices) {
2003       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2004       if (nnsp_has_cnst) { /* consider all vertices */
2005         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2006         for (i=0;i<n_vertices;i++) {
2007           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2008           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2009           total_counts++;
2010         }
2011       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2012         PetscBool used_vertex;
2013         for (i=0;i<n_vertices;i++) {
2014           used_vertex = PETSC_FALSE;
2015           k = 0;
2016           while (!used_vertex && k<nnsp_size) {
2017             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2018             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2019               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2020               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2021               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2022               total_counts++;
2023               used_vertex = PETSC_TRUE;
2024             }
2025             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2026             k++;
2027           }
2028         }
2029       }
2030       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2031       n_vertices = total_counts;
2032     }
2033 
2034     /* edges and faces */
2035     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2036       if (ncc<n_ISForEdges) {
2037         used_IS = &ISForEdges[ncc];
2038         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2039       } else {
2040         used_IS = &ISForFaces[ncc-n_ISForEdges];
2041         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2042       }
2043       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2044       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2045       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2046       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2047       /* change of basis should not be performed on local periodic nodes */
2048       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2049       if (nnsp_has_cnst) {
2050         PetscScalar quad_value;
2051         temp_constraints++;
2052         if (!pcbddc->use_nnsp_true) {
2053           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2054         } else {
2055           quad_value = 1.0;
2056         }
2057         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2058         for (j=0;j<size_of_constraint;j++) {
2059           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2060         }
2061         /* sort by global ordering if using lapack subroutines (not needed!) */
2062         if (!skip_lapack || pcbddc->use_qr_single) {
2063           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2064           for (j=0;j<size_of_constraint;j++) {
2065             permutation[j]=j;
2066           }
2067           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2068           for (j=0;j<size_of_constraint;j++) {
2069             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2070           }
2071           for (j=0;j<size_of_constraint;j++) {
2072             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2073             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2074           }
2075           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2076           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2077         }
2078         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2079         total_counts++;
2080       }
2081       for (k=0;k<nnsp_size;k++) {
2082         PetscReal real_value;
2083         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2084         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2085         for (j=0;j<size_of_constraint;j++) {
2086           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2087         }
2088         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2089         /* check if array is null on the connected component */
2090         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2091         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2092         if (real_value > 0.0) { /* keep indices and values */
2093           /* sort by global ordering if using lapack subroutines */
2094           if (!skip_lapack || pcbddc->use_qr_single) {
2095             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2096             for (j=0;j<size_of_constraint;j++) {
2097               permutation[j]=j;
2098             }
2099             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2100             for (j=0;j<size_of_constraint;j++) {
2101               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2102               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2103             }
2104             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2105             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2106           }
2107           temp_constraints++;
2108           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2109           total_counts++;
2110         }
2111       }
2112       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2113       valid_constraints = temp_constraints;
2114       if (!pcbddc->use_nnsp_true && temp_constraints) {
2115         if (temp_constraints == 1) { /* just normalize the constraint */
2116           PetscScalar norm;
2117           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2118           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2119           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2120           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2121         } else { /* perform SVD */
2122           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2123 
2124 #if defined(PETSC_MISSING_LAPACK_GESVD)
2125           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2126              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2127              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2128                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2129                 from that computed using LAPACKgesvd
2130              -> This is due to a different computation of eigenvectors in LAPACKheev
2131              -> The quality of the POD-computed basis will be the same */
2132           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2133           /* Store upper triangular part of correlation matrix */
2134           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2135           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2136           for (j=0;j<temp_constraints;j++) {
2137             for (k=0;k<j+1;k++) {
2138               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2139             }
2140           }
2141           /* compute eigenvalues and eigenvectors of correlation matrix */
2142           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2143           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2144 #if !defined(PETSC_USE_COMPLEX)
2145           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2146 #else
2147           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2148 #endif
2149           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2150           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2151           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2152           j = 0;
2153           while (j < temp_constraints && singular_vals[j] < tol) j++;
2154           total_counts = total_counts-j;
2155           valid_constraints = temp_constraints-j;
2156           /* scale and copy POD basis into used quadrature memory */
2157           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2158           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2159           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2160           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2161           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2162           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2163           if (j<temp_constraints) {
2164             PetscInt ii;
2165             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2166             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2167             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2168             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2169             for (k=0;k<temp_constraints-j;k++) {
2170               for (ii=0;ii<size_of_constraint;ii++) {
2171                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2172               }
2173             }
2174           }
2175 #else  /* on missing GESVD */
2176           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2177           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2178           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2179           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2180 #if !defined(PETSC_USE_COMPLEX)
2181           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2182 #else
2183           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2184 #endif
2185           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2186           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2187           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2188           k = temp_constraints;
2189           if (k > size_of_constraint) k = size_of_constraint;
2190           j = 0;
2191           while (j < k && singular_vals[k-j-1] < tol) j++;
2192           valid_constraints = k-j;
2193           total_counts = total_counts-temp_constraints+valid_constraints;
2194 #endif /* on missing GESVD */
2195         }
2196       }
2197       /* setting change_of_basis flag is safe now */
2198       if (boolforchange) {
2199         for (j=0;j<valid_constraints;j++) {
2200           PetscBTSet(change_basis,total_counts-j-1);
2201         }
2202       }
2203     }
2204     /* free workspace */
2205     if (!skip_lapack || pcbddc->use_qr_single) {
2206       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2207     }
2208     if (!skip_lapack) {
2209       ierr = PetscFree(work);CHKERRQ(ierr);
2210 #if defined(PETSC_USE_COMPLEX)
2211       ierr = PetscFree(rwork);CHKERRQ(ierr);
2212 #endif
2213       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2214 #if defined(PETSC_MISSING_LAPACK_GESVD)
2215       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2216       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2217 #endif
2218     }
2219     for (k=0;k<nnsp_size;k++) {
2220       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2221     }
2222     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2223     /* free index sets of faces, edges and vertices */
2224     for (i=0;i<n_ISForFaces;i++) {
2225       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2226     }
2227     if (n_ISForFaces) {
2228       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2229     }
2230     for (i=0;i<n_ISForEdges;i++) {
2231       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2232     }
2233     if (n_ISForEdges) {
2234       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2235     }
2236     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2237   } else {
2238     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2239     PetscInt        cum = 0;
2240 
2241     total_counts = 0;
2242     n_vertices = 0;
2243     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2244       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2245     }
2246     max_constraints = 0;
2247     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2248       total_counts += pcbddc->adaptive_constraints_n[i];
2249       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2250     }
2251     temp_indices = pcbddc->adaptive_constraints_ptrs;
2252     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2253     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2254 
2255 #if 0
2256     printf("Found %d totals\n",total_counts);
2257     for (i=0;i<total_counts;i++) {
2258       printf("const %d, start %d",i,temp_indices[i]);
2259       printf(" end %d:\n",temp_indices[i+1]);
2260       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2261         printf("  idxs %d",temp_indices_to_constraint[j]);
2262         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2263       }
2264     }
2265     for (i=0;i<n_vertices;i++) {
2266       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2267     }
2268     for (i=0;i<sub_schurs->n_subs;i++) {
2269       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2270     }
2271 #endif
2272 
2273     max_size_of_constraint = 0;
2274     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2275     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2276     /* Change of basis */
2277     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2278     if (pcbddc->use_change_of_basis) {
2279       cum = n_vertices;
2280       for (i=0;i<sub_schurs->n_subs;i++) {
2281         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2282           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2283             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2284           }
2285         }
2286         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2287       }
2288     }
2289   }
2290 
2291   /* map temp_indices_to_constraint in boundary numbering */
2292   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2293   if (i != temp_indices[total_counts]) {
2294     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2295   }
2296 
2297   /* set quantities in pcbddc data structure and store previous primal size */
2298   /* n_vertices defines the number of subdomain corners in the primal space */
2299   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2300   pcbddc->local_primal_size = total_counts;
2301   pcbddc->n_vertices = n_vertices;
2302   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2303 
2304   /* Create constraint matrix */
2305   /* The constraint matrix is used to compute the l2g map of primal dofs */
2306   /* so we need to set it up properly either with or without change of basis */
2307   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2308   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2309   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2310   /* array to compute a local numbering of constraints : vertices first then constraints */
2311   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2312   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2313   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2314   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2315   /* auxiliary stuff for basis change */
2316   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2317   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2318 
2319   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2320   total_primal_vertices=0;
2321   for (i=0;i<pcbddc->local_primal_size;i++) {
2322     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2323     if (size_of_constraint == 1) {
2324       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2325       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2326       aux_primal_minloc[total_primal_vertices]=0;
2327       total_primal_vertices++;
2328     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2329       PetscInt min_loc,min_index;
2330       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2331       /* find first untouched local node */
2332       k = 0;
2333       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2334       min_index = global_indices[k];
2335       min_loc = k;
2336       /* search the minimum among global nodes already untouched on the cc */
2337       for (k=1;k<size_of_constraint;k++) {
2338         /* there can be more than one constraint on a single connected component */
2339         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2340           min_index = global_indices[k];
2341           min_loc = k;
2342         }
2343       }
2344       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2345       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2346       aux_primal_minloc[total_primal_vertices]=min_loc;
2347       total_primal_vertices++;
2348     }
2349   }
2350   /* determine if a QR strategy is needed for change of basis */
2351   qr_needed = PETSC_FALSE;
2352   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2353   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2354     if (PetscBTLookup(change_basis,i)) {
2355       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2356         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2357         j = 0;
2358         for (k=0;k<size_of_constraint;k++) {
2359           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2360             j++;
2361           }
2362         }
2363         /* found more than one primal dof on the cc */
2364         if (j > 1) {
2365           PetscBTSet(qr_needed_idx,i);
2366           qr_needed = PETSC_TRUE;
2367         }
2368       } else {
2369         PetscBTSet(qr_needed_idx,i);
2370         qr_needed = PETSC_TRUE;
2371       }
2372     }
2373   }
2374   total_counts = total_primal_vertices;
2375   for (i=0;i<pcbddc->local_primal_size;i++) {
2376     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2377     if (size_of_constraint != 1 && !PetscBTLookup(change_basis,i)) {
2378       PetscInt min_loc,min_index;
2379       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2380       /* find first untouched local node */
2381       k = 0;
2382       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2383       min_index = global_indices[k];
2384       min_loc = k;
2385       /* search the minimum among global nodes already untouched on the cc */
2386       for (k=1;k<size_of_constraint;k++) {
2387         /* there can be more than one constraint on a single connected component */
2388         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2389           min_index = global_indices[k];
2390           min_loc = k;
2391         }
2392       }
2393       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2394       aux_primal_numbering[total_counts]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2395       aux_primal_minloc[total_counts]=min_loc;
2396       total_counts++;
2397     }
2398   }
2399   /* free workspace */
2400   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2401 
2402   /* permute indices in order to have a sorted set of vertices */
2403   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2404 
2405   /* get reference dofs for local primal dofs */
2406   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2407   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,pcbddc->local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2408   pcbddc->n_actual_vertices = total_primal_vertices;
2409 
2410   /* nonzero structure of constraint matrix */
2411   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2412   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2413   j=total_primal_vertices;
2414   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2415     if (!PetscBTLookup(change_basis,i)) {
2416       nnz[j]=temp_indices[i+1]-temp_indices[i];
2417       j++;
2418     }
2419   }
2420   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2421   ierr = PetscFree(nnz);CHKERRQ(ierr);
2422 
2423   /* set values in constraint matrix */
2424   for (i=0;i<total_primal_vertices;i++) {
2425     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2426   }
2427   total_counts = total_primal_vertices;
2428   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2429     if (!PetscBTLookup(change_basis,i)) {
2430       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2431       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2432       total_counts++;
2433     }
2434   }
2435   /* assembling */
2436   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2437   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2438 
2439   /*
2440   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2441   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2442   */
2443   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2444   if (pcbddc->use_change_of_basis) {
2445     /* dual and primal dofs on a single cc */
2446     PetscInt     dual_dofs,primal_dofs;
2447     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2448     PetscInt     primal_counter;
2449     /* working stuff for GEQRF */
2450     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2451     PetscBLASInt lqr_work;
2452     /* working stuff for UNGQR */
2453     PetscScalar  *gqr_work,lgqr_work_t;
2454     PetscBLASInt lgqr_work;
2455     /* working stuff for TRTRS */
2456     PetscScalar  *trs_rhs;
2457     PetscBLASInt Blas_NRHS;
2458     /* pointers for values insertion into change of basis matrix */
2459     PetscInt     *start_rows,*start_cols;
2460     PetscScalar  *start_vals;
2461     /* working stuff for values insertion */
2462     PetscBT      is_primal;
2463     /* matrix sizes */
2464     PetscInt     global_size,local_size;
2465     /* temporary change of basis */
2466     Mat          localChangeOfBasisMatrix;
2467     /* extra space for debugging */
2468     PetscScalar  *dbg_work;
2469 
2470     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2471     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2472     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2473     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2474     /* nonzeros for local mat */
2475     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2476     for (i=0;i<pcis->n;i++) nnz[i]=1;
2477     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2478       if (PetscBTLookup(change_basis,i)) {
2479         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2480         if (PetscBTLookup(qr_needed_idx,i)) {
2481           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2482         } else {
2483           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2484           /* get local primal index on the cc */
2485           j = 0;
2486           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2487           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2488         }
2489       }
2490     }
2491     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2492     ierr = PetscFree(nnz);CHKERRQ(ierr);
2493     /* Set initial identity in the matrix */
2494     for (i=0;i<pcis->n;i++) {
2495       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2496     }
2497 
2498     if (pcbddc->dbg_flag) {
2499       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2500       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2501     }
2502 
2503 
2504     /* Now we loop on the constraints which need a change of basis */
2505     /*
2506        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2507        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2508 
2509        Basic blocks of change of basis matrix T computed by
2510 
2511           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2512 
2513             | 1        0   ...        0         s_1/S |
2514             | 0        1   ...        0         s_2/S |
2515             |              ...                        |
2516             | 0        ...            1     s_{n-1}/S |
2517             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2518 
2519             with S = \sum_{i=1}^n s_i^2
2520             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2521                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2522 
2523           - QR decomposition of constraints otherwise
2524     */
2525     if (qr_needed) {
2526       /* space to store Q */
2527       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2528       /* first we issue queries for optimal work */
2529       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2530       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2531       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2532       lqr_work = -1;
2533       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2534       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2535       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2536       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2537       lgqr_work = -1;
2538       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2539       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2540       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2541       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2542       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2543       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2544       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2545       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2546       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2547       /* array to store scaling factors for reflectors */
2548       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2549       /* array to store rhs and solution of triangular solver */
2550       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2551       /* allocating workspace for check */
2552       if (pcbddc->dbg_flag) {
2553         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2554       }
2555     }
2556     /* array to store whether a node is primal or not */
2557     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2558     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2559     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2560     if (i != total_primal_vertices) {
2561       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2562     }
2563     for (i=0;i<total_primal_vertices;i++) {
2564       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2565     }
2566     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2567 
2568     /* loop on constraints and see whether or not they need a change of basis and compute it */
2569     /* -> using implicit ordering contained in temp_indices data */
2570     total_counts = pcbddc->n_vertices;
2571     primal_counter = total_counts;
2572     while (total_counts<pcbddc->local_primal_size) {
2573       primal_dofs = 1;
2574       if (PetscBTLookup(change_basis,total_counts)) {
2575         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2576         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2577           primal_dofs++;
2578         }
2579         /* get constraint info */
2580         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2581         dual_dofs = size_of_constraint-primal_dofs;
2582 
2583         if (pcbddc->dbg_flag) {
2584           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2585         }
2586 
2587         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2588 
2589           /* copy quadrature constraints for change of basis check */
2590           if (pcbddc->dbg_flag) {
2591             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2592           }
2593           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2594           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2595 
2596           /* compute QR decomposition of constraints */
2597           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2598           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2599           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2600           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2601           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2602           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2603           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2604 
2605           /* explictly compute R^-T */
2606           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2607           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2608           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2609           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2610           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2611           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2612           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2613           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2614           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2615           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2616 
2617           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2618           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2619           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2620           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2621           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2622           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2623           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2624           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2625           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2626 
2627           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2628              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2629              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2630           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2631           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2632           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2633           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2634           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2635           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2636           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2637           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2638           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2639           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2640 
2641           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2642           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2643           /* insert cols for primal dofs */
2644           for (j=0;j<primal_dofs;j++) {
2645             start_vals = &qr_basis[j*size_of_constraint];
2646             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2647             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2648           }
2649           /* insert cols for dual dofs */
2650           for (j=0,k=0;j<dual_dofs;k++) {
2651             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2652               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2653               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2654               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2655               j++;
2656             }
2657           }
2658 
2659           /* check change of basis */
2660           if (pcbddc->dbg_flag) {
2661             PetscInt   ii,jj;
2662             PetscBool valid_qr=PETSC_TRUE;
2663             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2664             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2665             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2666             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2667             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2668             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2669             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2670             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2671             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2672             for (jj=0;jj<size_of_constraint;jj++) {
2673               for (ii=0;ii<primal_dofs;ii++) {
2674                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2675                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2676               }
2677             }
2678             if (!valid_qr) {
2679               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2680               for (jj=0;jj<size_of_constraint;jj++) {
2681                 for (ii=0;ii<primal_dofs;ii++) {
2682                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2683                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2684                   }
2685                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2686                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2687                   }
2688                 }
2689               }
2690             } else {
2691               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2692             }
2693           }
2694         } else { /* simple transformation block */
2695           PetscInt    row,col;
2696           PetscScalar val,norm;
2697 
2698           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2699           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2700           for (j=0;j<size_of_constraint;j++) {
2701             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2702             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2703             if (!PetscBTLookup(is_primal,row_B)) {
2704               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2705               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2706               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2707             } else {
2708               for (k=0;k<size_of_constraint;k++) {
2709                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2710                 if (row != col) {
2711                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2712                 } else {
2713                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2714                 }
2715                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2716               }
2717             }
2718           }
2719           if (pcbddc->dbg_flag) {
2720             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2721           }
2722         }
2723         /* increment primal counter */
2724         primal_counter += primal_dofs;
2725       } else {
2726         if (pcbddc->dbg_flag) {
2727           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2728         }
2729       }
2730       /* increment constraint counter total_counts */
2731       total_counts += primal_dofs;
2732     }
2733 
2734     /* free workspace */
2735     if (qr_needed) {
2736       if (pcbddc->dbg_flag) {
2737         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2738       }
2739       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2740       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2741       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2742       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2743       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2744     }
2745     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2746     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2747     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2748 
2749     /* assembling of global change of variable */
2750     {
2751       Mat      tmat;
2752       PetscInt bs;
2753 
2754       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2755       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2756       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2757       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2758       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2759       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2760       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2761       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2762       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2763       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2764       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2765       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2766       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2767       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2768       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2769       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2770       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2771       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2772     }
2773     /* check */
2774     if (pcbddc->dbg_flag) {
2775       PetscReal error;
2776       Vec       x,x_change;
2777 
2778       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2779       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2780       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2781       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2782       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2783       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2784       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2785       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2786       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2787       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2788       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2789       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2790       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2791       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2792       ierr = VecDestroy(&x);CHKERRQ(ierr);
2793       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2794     }
2795 
2796     /* adapt sub_schurs computed (if any) */
2797     if (pcbddc->use_deluxe_scaling) {
2798       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2799       if (sub_schurs->S_Ej_all) {
2800         Mat S_new,tmat;
2801         IS is_all_N;
2802 
2803         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2804         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2805         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2806         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2807         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2808         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2809         sub_schurs->S_Ej_all = S_new;
2810         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2811         if (sub_schurs->sum_S_Ej_all) {
2812           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2813           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2814           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2815           sub_schurs->sum_S_Ej_all = S_new;
2816           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2817         }
2818         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2819       }
2820     }
2821     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2822   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2823     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2824     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2825   }
2826 
2827   /* set up change of basis context */
2828   if (pcbddc->ChangeOfBasisMatrix) {
2829     PCBDDCChange_ctx change_ctx;
2830 
2831     if (!pcbddc->new_global_mat) {
2832       PetscInt global_size,local_size;
2833 
2834       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2835       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2836       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2837       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2838       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2839       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2840       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2841       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2842       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2843     } else {
2844       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2845       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2846       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2847     }
2848     if (!pcbddc->user_ChangeOfBasisMatrix) {
2849       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2850       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2851     } else {
2852       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2853       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2854     }
2855     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2856     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2857     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2858     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2859   }
2860 
2861   /* get indices in local ordering for vertices and constraints */
2862   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2863 
2864   /* check if a new primal space has been introduced */
2865   pcbddc->new_primal_space_local = PETSC_TRUE;
2866   if (olocal_primal_size == pcbddc->local_primal_size) {
2867     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2868     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2869   }
2870   ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2871   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2872   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2873 
2874   /* flush dbg viewer */
2875   if (pcbddc->dbg_flag) {
2876     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2877   }
2878 
2879   /* free workspace */
2880   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2881   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2882   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2883   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2884   if (!pcbddc->adaptive_selection) {
2885     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2886     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2887   } else {
2888     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2889                       pcbddc->adaptive_constraints_ptrs,
2890                       pcbddc->adaptive_constraints_idxs,
2891                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2892     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2893   }
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 #undef __FUNCT__
2898 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2899 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2900 {
2901   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2902   PC_IS       *pcis = (PC_IS*)pc->data;
2903   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2904   PetscInt    ierr,i,vertex_size,N;
2905   PetscViewer viewer=pcbddc->dbg_viewer;
2906 
2907   PetscFunctionBegin;
2908   /* Reset previously computed graph */
2909   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2910   /* Init local Graph struct */
2911   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2912   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2913 
2914   /* Check validity of the csr graph passed in by the user */
2915   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2916     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2917   }
2918 
2919   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2920   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2921     PetscInt  *xadj,*adjncy;
2922     PetscInt  nvtxs;
2923     PetscBool flg_row=PETSC_FALSE;
2924 
2925     if (pcbddc->use_local_adj) {
2926 
2927       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2928       if (flg_row) {
2929         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2930         pcbddc->computed_rowadj = PETSC_TRUE;
2931       }
2932       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2933     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2934       IS                     is_dummy;
2935       ISLocalToGlobalMapping l2gmap_dummy;
2936       PetscInt               j,sum;
2937       PetscInt               *cxadj,*cadjncy;
2938       const PetscInt         *idxs;
2939       PCBDDCGraph            graph;
2940       PetscBT                is_on_boundary;
2941 
2942       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2943       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2944       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2945       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2946       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2947       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2948       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2949       if (flg_row) {
2950         graph->xadj = xadj;
2951         graph->adjncy = adjncy;
2952       }
2953       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2954       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2955       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2956 
2957       if (pcbddc->dbg_flag) {
2958         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2959         for (i=0;i<graph->ncc;i++) {
2960           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2961         }
2962       }
2963 
2964       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2965       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2966       for (i=0;i<pcis->n_B;i++) {
2967         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2968       }
2969       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2970 
2971       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2972       sum = 0;
2973       for (i=0;i<graph->ncc;i++) {
2974         PetscInt sizecc = 0;
2975         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2976           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2977             sizecc++;
2978           }
2979         }
2980         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2981           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2982             cxadj[graph->queue[j]] = sizecc;
2983           }
2984         }
2985         sum += sizecc*sizecc;
2986       }
2987       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2988       sum = 0;
2989       for (i=0;i<pcis->n;i++) {
2990         PetscInt temp = cxadj[i];
2991         cxadj[i] = sum;
2992         sum += temp;
2993       }
2994       cxadj[pcis->n] = sum;
2995       for (i=0;i<graph->ncc;i++) {
2996         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2997           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2998             PetscInt k,sizecc = 0;
2999             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3000               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3001                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3002                 sizecc++;
3003               }
3004             }
3005           }
3006         }
3007       }
3008       if (sum) {
3009         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3010       } else {
3011         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3012         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3013       }
3014       graph->xadj = 0;
3015       graph->adjncy = 0;
3016       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3017       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3018     }
3019   }
3020   if (pcbddc->dbg_flag) {
3021     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3022   }
3023 
3024   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3025   vertex_size = 1;
3026   if (pcbddc->user_provided_isfordofs) {
3027     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3028       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3029       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3030         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3031         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3032       }
3033       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3034       pcbddc->n_ISForDofs = 0;
3035       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3036     }
3037     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3038     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3039   } else {
3040     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3041       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3042       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3043       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3044         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3045       }
3046     }
3047   }
3048 
3049   /* Setup of Graph */
3050   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3051     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3052   }
3053   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3054     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3055   }
3056   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3057 
3058   /* Graph's connected components analysis */
3059   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3060 
3061   /* print some info to stdout */
3062   if (pcbddc->dbg_flag) {
3063     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3064   }
3065 
3066   /* mark topography has done */
3067   pcbddc->recompute_topography = PETSC_FALSE;
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "PCBDDCSubsetNumbering"
3073 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3074 {
3075   Vec            local_vec,global_vec;
3076   IS             seqis,paris;
3077   VecScatter     scatter_ctx;
3078   PetscScalar    *array;
3079   PetscInt       *temp_global_dofs;
3080   PetscScalar    globalsum;
3081   PetscInt       i,j,s;
3082   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3083   PetscMPIInt    rank_prec_comm,size_prec_comm;
3084   PetscInt       *dof_sizes,*dof_displs;
3085   PetscBool      first_found;
3086   PetscErrorCode ierr;
3087 
3088   PetscFunctionBegin;
3089   /* mpi buffers */
3090   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3091   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3092   j = ( !rank_prec_comm ? size_prec_comm : 0);
3093   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3094   /* get maximum size of subset */
3095   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3096   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3097   max_local = 0;
3098   for (i=0;i<n_local_dofs;i++) {
3099     if (max_local < temp_global_dofs[i] ) {
3100       max_local = temp_global_dofs[i];
3101     }
3102   }
3103   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3104   max_global++;
3105   max_local = 0;
3106   for (i=0;i<n_local_dofs;i++) {
3107     if (max_local < local_dofs[i] ) {
3108       max_local = local_dofs[i];
3109     }
3110   }
3111   max_local++;
3112   /* allocate workspace */
3113   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3114   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3115   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3116   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3117   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3118   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3119   /* create scatter */
3120   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3121   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3122   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3123   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3124   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3125   /* init array */
3126   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3127   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3128   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3129   if (local_dofs_mult) {
3130     for (i=0;i<n_local_dofs;i++) {
3131       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3132     }
3133   } else {
3134     for (i=0;i<n_local_dofs;i++) {
3135       array[local_dofs[i]]=1.0;
3136     }
3137   }
3138   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3139   /* scatter into global vec and get total number of global dofs */
3140   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3141   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3142   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3143   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3144   /* Fill global_vec with cumulative function for global numbering */
3145   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3146   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3147   nlocals = 0;
3148   first_index = -1;
3149   first_found = PETSC_FALSE;
3150   for (i=0;i<s;i++) {
3151     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3152       first_found = PETSC_TRUE;
3153       first_index = i;
3154     }
3155     nlocals += (PetscInt)PetscRealPart(array[i]);
3156   }
3157   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3158   if (!rank_prec_comm) {
3159     dof_displs[0]=0;
3160     for (i=1;i<size_prec_comm;i++) {
3161       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3162     }
3163   }
3164   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3165   if (first_found) {
3166     array[first_index] += (PetscScalar)nlocals;
3167     old_index = first_index;
3168     for (i=first_index+1;i<s;i++) {
3169       if (PetscRealPart(array[i]) > 0.1) {
3170         array[i] += array[old_index];
3171         old_index = i;
3172       }
3173     }
3174   }
3175   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3176   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3177   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3178   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3179   /* get global ordering of local dofs */
3180   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3181   if (local_dofs_mult) {
3182     for (i=0;i<n_local_dofs;i++) {
3183       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3184     }
3185   } else {
3186     for (i=0;i<n_local_dofs;i++) {
3187       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3188     }
3189   }
3190   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3191   /* free workspace */
3192   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3193   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3194   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3195   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3196   /* return pointer to global ordering of local dofs */
3197   *global_numbering_subset = temp_global_dofs;
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 #undef __FUNCT__
3202 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3203 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3204 {
3205   PetscInt       i,j;
3206   PetscScalar    *alphas;
3207   PetscErrorCode ierr;
3208 
3209   PetscFunctionBegin;
3210   /* this implements stabilized Gram-Schmidt */
3211   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3212   for (i=0;i<n;i++) {
3213     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3214     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3215     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3216   }
3217   ierr = PetscFree(alphas);CHKERRQ(ierr);
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 #undef __FUNCT__
3222 #define __FUNCT__ "MatISGetSubassemblingPattern"
3223 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3224 {
3225   Mat             subdomain_adj;
3226   IS              new_ranks,ranks_send_to;
3227   MatPartitioning partitioner;
3228   Mat_IS          *matis;
3229   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3230   PetscInt        prank;
3231   PetscMPIInt     size,rank,color;
3232   PetscInt        *xadj,*adjncy,*oldranks;
3233   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3234   PetscInt        i,local_size,threshold=0;
3235   PetscErrorCode  ierr;
3236   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3237   PetscSubcomm    subcomm;
3238 
3239   PetscFunctionBegin;
3240   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3241   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3242   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3243 
3244   /* Get info on mapping */
3245   matis = (Mat_IS*)(mat->data);
3246   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3247   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3248 
3249   /* build local CSR graph of subdomains' connectivity */
3250   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3251   xadj[0] = 0;
3252   xadj[1] = PetscMax(n_neighs-1,0);
3253   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3254   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3255 
3256   if (threshold) {
3257     PetscInt xadj_count = 0;
3258     for (i=1;i<n_neighs;i++) {
3259       if (n_shared[i] > threshold) {
3260         adjncy[xadj_count] = neighs[i];
3261         adjncy_wgt[xadj_count] = n_shared[i];
3262         xadj_count++;
3263       }
3264     }
3265     xadj[1] = xadj_count;
3266   } else {
3267     if (xadj[1]) {
3268       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3269       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3270     }
3271   }
3272   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3273   if (use_square) {
3274     for (i=0;i<xadj[1];i++) {
3275       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3276     }
3277   }
3278   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3279 
3280   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3281 
3282   /*
3283     Restrict work on active processes only.
3284   */
3285   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3286   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3287   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3288   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3289   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3290   if (color) {
3291     ierr = PetscFree(xadj);CHKERRQ(ierr);
3292     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3293     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3294   } else {
3295     PetscInt coarsening_ratio;
3296     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3297     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3298     prank = rank;
3299     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3300     /*
3301     for (i=0;i<size;i++) {
3302       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3303     }
3304     */
3305     for (i=0;i<xadj[1];i++) {
3306       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3307     }
3308     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3309     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3310     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3311 
3312     /* Partition */
3313     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3314     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3315     if (use_vwgt) {
3316       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3317       v_wgt[0] = local_size;
3318       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3319     }
3320     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3321     coarsening_ratio = size/n_subdomains;
3322     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3323     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3324     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3325     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3326 
3327     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3328     if (contiguous) {
3329       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3330     } else {
3331       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3332     }
3333     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3334     /* clean up */
3335     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3336     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3337     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3338     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3339   }
3340   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3341 
3342   /* assemble parallel IS for sends */
3343   i = 1;
3344   if (color) i=0;
3345   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3346 
3347   /* get back IS */
3348   *is_sends = ranks_send_to;
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatISSubassemble"
3356 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3357 {
3358   Mat                    local_mat;
3359   Mat_IS                 *matis;
3360   IS                     is_sends_internal;
3361   PetscInt               rows,cols,new_local_rows;
3362   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3363   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3364   ISLocalToGlobalMapping l2gmap;
3365   PetscInt*              l2gmap_indices;
3366   const PetscInt*        is_indices;
3367   MatType                new_local_type;
3368   /* buffers */
3369   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3370   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3371   PetscInt               *recv_buffer_idxs_local;
3372   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3373   /* MPI */
3374   MPI_Comm               comm,comm_n;
3375   PetscSubcomm           subcomm;
3376   PetscMPIInt            n_sends,n_recvs,commsize;
3377   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3378   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3379   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3380   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3381   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3382   PetscErrorCode         ierr;
3383 
3384   PetscFunctionBegin;
3385   /* TODO: add missing checks */
3386   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3387   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3388   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3389   PetscValidLogicalCollectiveInt(mat,nis,7);
3390   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3391   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3392   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3393   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3394   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3395   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3396   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3397   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3398     PetscInt mrows,mcols,mnrows,mncols;
3399     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3400     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3401     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3402     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3403     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3404     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3405   }
3406   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3407   PetscValidLogicalCollectiveInt(mat,bs,0);
3408   /* prepare IS for sending if not provided */
3409   if (!is_sends) {
3410     PetscBool pcontig = PETSC_TRUE;
3411     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3412     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3413   } else {
3414     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3415     is_sends_internal = is_sends;
3416   }
3417 
3418   /* get pointer of MATIS data */
3419   matis = (Mat_IS*)mat->data;
3420 
3421   /* get comm */
3422   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3423 
3424   /* compute number of sends */
3425   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3426   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3427 
3428   /* compute number of receives */
3429   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3430   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3431   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3432   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3433   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3434   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3435   ierr = PetscFree(iflags);CHKERRQ(ierr);
3436 
3437   /* restrict comm if requested */
3438   subcomm = 0;
3439   destroy_mat = PETSC_FALSE;
3440   if (restrict_comm) {
3441     PetscMPIInt color,subcommsize;
3442 
3443     color = 0;
3444     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3445     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3446     subcommsize = commsize - subcommsize;
3447     /* check if reuse has been requested */
3448     if (reuse == MAT_REUSE_MATRIX) {
3449       if (*mat_n) {
3450         PetscMPIInt subcommsize2;
3451         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3452         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3453         comm_n = PetscObjectComm((PetscObject)*mat_n);
3454       } else {
3455         comm_n = PETSC_COMM_SELF;
3456       }
3457     } else { /* MAT_INITIAL_MATRIX */
3458       PetscMPIInt rank;
3459 
3460       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3461       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3462       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3463       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3464       comm_n = PetscSubcommChild(subcomm);
3465     }
3466     /* flag to destroy *mat_n if not significative */
3467     if (color) destroy_mat = PETSC_TRUE;
3468   } else {
3469     comm_n = comm;
3470   }
3471 
3472   /* prepare send/receive buffers */
3473   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3474   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3475   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3476   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3477   if (nis) {
3478     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3479   }
3480 
3481   /* Get data from local matrices */
3482   if (!isdense) {
3483     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3484     /* TODO: See below some guidelines on how to prepare the local buffers */
3485     /*
3486        send_buffer_vals should contain the raw values of the local matrix
3487        send_buffer_idxs should contain:
3488        - MatType_PRIVATE type
3489        - PetscInt        size_of_l2gmap
3490        - PetscInt        global_row_indices[size_of_l2gmap]
3491        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3492     */
3493   } else {
3494     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3495     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3496     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3497     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3498     send_buffer_idxs[1] = i;
3499     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3500     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3501     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3502     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3503     for (i=0;i<n_sends;i++) {
3504       ilengths_vals[is_indices[i]] = len*len;
3505       ilengths_idxs[is_indices[i]] = len+2;
3506     }
3507   }
3508   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3509   /* additional is (if any) */
3510   if (nis) {
3511     PetscMPIInt psum;
3512     PetscInt j;
3513     for (j=0,psum=0;j<nis;j++) {
3514       PetscInt plen;
3515       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3516       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3517       psum += len+1; /* indices + lenght */
3518     }
3519     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3520     for (j=0,psum=0;j<nis;j++) {
3521       PetscInt plen;
3522       const PetscInt *is_array_idxs;
3523       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3524       send_buffer_idxs_is[psum] = plen;
3525       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3526       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3527       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3528       psum += plen+1; /* indices + lenght */
3529     }
3530     for (i=0;i<n_sends;i++) {
3531       ilengths_idxs_is[is_indices[i]] = psum;
3532     }
3533     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3534   }
3535 
3536   buf_size_idxs = 0;
3537   buf_size_vals = 0;
3538   buf_size_idxs_is = 0;
3539   for (i=0;i<n_recvs;i++) {
3540     buf_size_idxs += (PetscInt)olengths_idxs[i];
3541     buf_size_vals += (PetscInt)olengths_vals[i];
3542     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3543   }
3544   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3545   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3546   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3547 
3548   /* get new tags for clean communications */
3549   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3550   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3551   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3552 
3553   /* allocate for requests */
3554   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3555   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3556   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3557   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3558   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3559   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3560 
3561   /* communications */
3562   ptr_idxs = recv_buffer_idxs;
3563   ptr_vals = recv_buffer_vals;
3564   ptr_idxs_is = recv_buffer_idxs_is;
3565   for (i=0;i<n_recvs;i++) {
3566     source_dest = onodes[i];
3567     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3568     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3569     ptr_idxs += olengths_idxs[i];
3570     ptr_vals += olengths_vals[i];
3571     if (nis) {
3572       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3573       ptr_idxs_is += olengths_idxs_is[i];
3574     }
3575   }
3576   for (i=0;i<n_sends;i++) {
3577     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3578     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3579     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3580     if (nis) {
3581       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3582     }
3583   }
3584   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3585   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3586 
3587   /* assemble new l2g map */
3588   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3589   ptr_idxs = recv_buffer_idxs;
3590   new_local_rows = 0;
3591   for (i=0;i<n_recvs;i++) {
3592     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3593     ptr_idxs += olengths_idxs[i];
3594   }
3595   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3596   ptr_idxs = recv_buffer_idxs;
3597   new_local_rows = 0;
3598   for (i=0;i<n_recvs;i++) {
3599     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3600     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3601     ptr_idxs += olengths_idxs[i];
3602   }
3603   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3604   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3605   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3606 
3607   /* infer new local matrix type from received local matrices type */
3608   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3609   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3610   if (n_recvs) {
3611     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3612     ptr_idxs = recv_buffer_idxs;
3613     for (i=0;i<n_recvs;i++) {
3614       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3615         new_local_type_private = MATAIJ_PRIVATE;
3616         break;
3617       }
3618       ptr_idxs += olengths_idxs[i];
3619     }
3620     switch (new_local_type_private) {
3621       case MATDENSE_PRIVATE:
3622         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3623           new_local_type = MATSEQAIJ;
3624           bs = 1;
3625         } else { /* if I receive only 1 dense matrix */
3626           new_local_type = MATSEQDENSE;
3627           bs = 1;
3628         }
3629         break;
3630       case MATAIJ_PRIVATE:
3631         new_local_type = MATSEQAIJ;
3632         bs = 1;
3633         break;
3634       case MATBAIJ_PRIVATE:
3635         new_local_type = MATSEQBAIJ;
3636         break;
3637       case MATSBAIJ_PRIVATE:
3638         new_local_type = MATSEQSBAIJ;
3639         break;
3640       default:
3641         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3642         break;
3643     }
3644   } else { /* by default, new_local_type is seqdense */
3645     new_local_type = MATSEQDENSE;
3646     bs = 1;
3647   }
3648 
3649   /* create MATIS object if needed */
3650   if (reuse == MAT_INITIAL_MATRIX) {
3651     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3652     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3653   } else {
3654     /* it also destroys the local matrices */
3655     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3656   }
3657   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3658   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3659 
3660   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3661 
3662   /* Global to local map of received indices */
3663   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3664   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3665   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3666 
3667   /* restore attributes -> type of incoming data and its size */
3668   buf_size_idxs = 0;
3669   for (i=0;i<n_recvs;i++) {
3670     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3671     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3672     buf_size_idxs += (PetscInt)olengths_idxs[i];
3673   }
3674   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3675 
3676   /* set preallocation */
3677   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3678   if (!newisdense) {
3679     PetscInt *new_local_nnz=0;
3680 
3681     ptr_vals = recv_buffer_vals;
3682     ptr_idxs = recv_buffer_idxs_local;
3683     if (n_recvs) {
3684       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3685     }
3686     for (i=0;i<n_recvs;i++) {
3687       PetscInt j;
3688       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3689         for (j=0;j<*(ptr_idxs+1);j++) {
3690           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3691         }
3692       } else {
3693         /* TODO */
3694       }
3695       ptr_idxs += olengths_idxs[i];
3696     }
3697     if (new_local_nnz) {
3698       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3699       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3700       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3701       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3702       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3703       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3704     } else {
3705       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3706     }
3707     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3708   } else {
3709     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3710   }
3711 
3712   /* set values */
3713   ptr_vals = recv_buffer_vals;
3714   ptr_idxs = recv_buffer_idxs_local;
3715   for (i=0;i<n_recvs;i++) {
3716     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3717       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3718       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3719       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3720       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3721       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3722     } else {
3723       /* TODO */
3724     }
3725     ptr_idxs += olengths_idxs[i];
3726     ptr_vals += olengths_vals[i];
3727   }
3728   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3729   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3730   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3731   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3732   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3733   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3734 
3735 #if 0
3736   if (!restrict_comm) { /* check */
3737     Vec       lvec,rvec;
3738     PetscReal infty_error;
3739 
3740     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3741     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3742     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3743     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3744     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3745     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3746     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3747     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3748     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3749   }
3750 #endif
3751 
3752   /* assemble new additional is (if any) */
3753   if (nis) {
3754     PetscInt **temp_idxs,*count_is,j,psum;
3755 
3756     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3757     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3758     ptr_idxs = recv_buffer_idxs_is;
3759     psum = 0;
3760     for (i=0;i<n_recvs;i++) {
3761       for (j=0;j<nis;j++) {
3762         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3763         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3764         psum += plen;
3765         ptr_idxs += plen+1; /* shift pointer to received data */
3766       }
3767     }
3768     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3769     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3770     for (i=1;i<nis;i++) {
3771       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3772     }
3773     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3774     ptr_idxs = recv_buffer_idxs_is;
3775     for (i=0;i<n_recvs;i++) {
3776       for (j=0;j<nis;j++) {
3777         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3778         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3779         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3780         ptr_idxs += plen+1; /* shift pointer to received data */
3781       }
3782     }
3783     for (i=0;i<nis;i++) {
3784       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3785       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3786       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3787     }
3788     ierr = PetscFree(count_is);CHKERRQ(ierr);
3789     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3790     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3791   }
3792   /* free workspace */
3793   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3794   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3795   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3796   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3797   if (isdense) {
3798     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3799     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3800   } else {
3801     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3802   }
3803   if (nis) {
3804     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3805     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3806   }
3807   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3808   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3809   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3810   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3811   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3812   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3813   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3814   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3815   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3816   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3817   ierr = PetscFree(onodes);CHKERRQ(ierr);
3818   if (nis) {
3819     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3820     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3821     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3822   }
3823   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3824   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3825     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3826     for (i=0;i<nis;i++) {
3827       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3828     }
3829   }
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 /* temporary hack into ksp private data structure */
3834 #include <petsc/private/kspimpl.h>
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3838 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3839 {
3840   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3841   PC_IS                  *pcis = (PC_IS*)pc->data;
3842   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3843   MatNullSpace           CoarseNullSpace=NULL;
3844   ISLocalToGlobalMapping coarse_islg;
3845   IS                     coarse_is,*isarray;
3846   PetscInt               i,im_active=-1,active_procs=-1;
3847   PetscInt               nis,nisdofs,nisneu;
3848   PC                     pc_temp;
3849   PCType                 coarse_pc_type;
3850   KSPType                coarse_ksp_type;
3851   PetscBool              multilevel_requested,multilevel_allowed;
3852   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3853   Mat                    t_coarse_mat_is;
3854   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3855   PetscMPIInt            all_procs;
3856   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3857   PetscBool              compute_vecs = PETSC_FALSE;
3858   PetscScalar            *array;
3859   PetscErrorCode         ierr;
3860 
3861   PetscFunctionBegin;
3862   /* Assign global numbering to coarse dofs */
3863   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3864     PetscInt ocoarse_size;
3865     compute_vecs = PETSC_TRUE;
3866     ocoarse_size = pcbddc->coarse_size;
3867     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3868     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3869     /* see if we can avoid some work */
3870     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3871       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3872         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3873         coarse_reuse = PETSC_FALSE;
3874       } else { /* we can safely reuse already computed coarse matrix */
3875         coarse_reuse = PETSC_TRUE;
3876       }
3877     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3878       coarse_reuse = PETSC_FALSE;
3879     }
3880     /* reset any subassembling information */
3881     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3882     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3883   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3884     coarse_reuse = PETSC_TRUE;
3885   }
3886 
3887   /* count "active" (i.e. with positive local size) and "void" processes */
3888   im_active = !!(pcis->n);
3889   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3890   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3891   void_procs = all_procs-active_procs;
3892   csin_type_simple = PETSC_TRUE;
3893   redist = PETSC_FALSE;
3894   if (pcbddc->current_level && void_procs) {
3895     csin_ml = PETSC_TRUE;
3896     ncoarse_ml = void_procs;
3897     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3898     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3899       csin_ds = PETSC_TRUE;
3900       ncoarse_ds = pcbddc->redistribute_coarse;
3901       redist = PETSC_TRUE;
3902     } else {
3903       csin_ds = PETSC_TRUE;
3904       ncoarse_ds = active_procs;
3905       redist = PETSC_TRUE;
3906     }
3907   } else {
3908     csin_ml = PETSC_FALSE;
3909     ncoarse_ml = all_procs;
3910     if (void_procs) {
3911       csin_ds = PETSC_TRUE;
3912       ncoarse_ds = void_procs;
3913       csin_type_simple = PETSC_FALSE;
3914     } else {
3915       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3916         csin_ds = PETSC_TRUE;
3917         ncoarse_ds = pcbddc->redistribute_coarse;
3918         redist = PETSC_TRUE;
3919       } else {
3920         csin_ds = PETSC_FALSE;
3921         ncoarse_ds = all_procs;
3922       }
3923     }
3924   }
3925 
3926   /*
3927     test if we can go multilevel: three conditions must be satisfied:
3928     - we have not exceeded the number of levels requested
3929     - we can actually subassemble the active processes
3930     - we can find a suitable number of MPI processes where we can place the subassembled problem
3931   */
3932   multilevel_allowed = PETSC_FALSE;
3933   multilevel_requested = PETSC_FALSE;
3934   if (pcbddc->current_level < pcbddc->max_levels) {
3935     multilevel_requested = PETSC_TRUE;
3936     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3937       multilevel_allowed = PETSC_FALSE;
3938     } else {
3939       multilevel_allowed = PETSC_TRUE;
3940     }
3941   }
3942   /* determine number of process partecipating to coarse solver */
3943   if (multilevel_allowed) {
3944     ncoarse = ncoarse_ml;
3945     csin = csin_ml;
3946     redist = PETSC_FALSE;
3947   } else {
3948     ncoarse = ncoarse_ds;
3949     csin = csin_ds;
3950   }
3951 
3952   /* creates temporary l2gmap and IS for coarse indexes */
3953   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3954   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3955 
3956   /* creates temporary MATIS object for coarse matrix */
3957   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3958   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3959   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3960   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3961 #if 0
3962   {
3963     PetscViewer viewer;
3964     char filename[256];
3965     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3966     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3967     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3968     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3969     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3970   }
3971 #endif
3972   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3973   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3974   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3977 
3978   /* compute dofs splitting and neumann boundaries for coarse dofs */
3979   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3980     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3981     const PetscInt         *idxs;
3982     ISLocalToGlobalMapping tmap;
3983 
3984     /* create map between primal indices (in local representative ordering) and local primal numbering */
3985     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3986     /* allocate space for temporary storage */
3987     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3988     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3989     /* allocate for IS array */
3990     nisdofs = pcbddc->n_ISForDofsLocal;
3991     nisneu = !!pcbddc->NeumannBoundariesLocal;
3992     nis = nisdofs + nisneu;
3993     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3994     /* dofs splitting */
3995     for (i=0;i<nisdofs;i++) {
3996       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3997       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3998       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3999       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4000       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4001       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4002       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4003       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4004     }
4005     /* neumann boundaries */
4006     if (pcbddc->NeumannBoundariesLocal) {
4007       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4008       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4009       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4010       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4011       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4012       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4013       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4014       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4015     }
4016     /* free memory */
4017     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4018     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4019     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4020   } else {
4021     nis = 0;
4022     nisdofs = 0;
4023     nisneu = 0;
4024     isarray = NULL;
4025   }
4026   /* destroy no longer needed map */
4027   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4028 
4029   /* restrict on coarse candidates (if needed) */
4030   coarse_mat_is = NULL;
4031   if (csin) {
4032     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4033       if (redist) {
4034         PetscMPIInt rank;
4035         PetscInt    spc,n_spc_p1,dest[1],destsize;
4036 
4037         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4038         spc = active_procs/ncoarse;
4039         n_spc_p1 = active_procs%ncoarse;
4040         if (im_active) {
4041           destsize = 1;
4042           if (rank > n_spc_p1*(spc+1)-1) {
4043             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4044           } else {
4045             dest[0] = rank/(spc+1);
4046           }
4047         } else {
4048           destsize = 0;
4049         }
4050         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4051       } else if (csin_type_simple) {
4052         PetscMPIInt rank;
4053         PetscInt    issize,isidx;
4054 
4055         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4056         if (im_active) {
4057           issize = 1;
4058           isidx = (PetscInt)rank;
4059         } else {
4060           issize = 0;
4061           isidx = -1;
4062         }
4063         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4064       } else { /* get a suitable subassembling pattern from MATIS code */
4065         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4066       }
4067 
4068       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4069       if (!redist || ncoarse <= void_procs) {
4070         PetscInt ncoarse_cand,tissize,*nisindices;
4071         PetscInt *coarse_candidates;
4072         const PetscInt* tisindices;
4073 
4074         /* get coarse candidates' ranks in pc communicator */
4075         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4076         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4077         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4078           if (!coarse_candidates[i]) {
4079             coarse_candidates[ncoarse_cand++]=i;
4080           }
4081         }
4082         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4083 
4084 
4085         if (pcbddc->dbg_flag) {
4086           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4087           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4088           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4089           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4090           for (i=0;i<ncoarse_cand;i++) {
4091             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4092           }
4093           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4094           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4095         }
4096         /* shift the pattern on coarse candidates */
4097         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4098         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4099         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4100         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4101         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4102         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4103         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4104       }
4105       if (pcbddc->dbg_flag) {
4106         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4107         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4108         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4109         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4110       }
4111     }
4112     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4113     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4114   } else {
4115     if (pcbddc->dbg_flag) {
4116       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4117       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4118       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4119     }
4120     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4121     coarse_mat_is = t_coarse_mat_is;
4122   }
4123 
4124   /* create local to global scatters for coarse problem */
4125   if (compute_vecs) {
4126     PetscInt lrows;
4127     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4128     if (coarse_mat_is) {
4129       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4130     } else {
4131       lrows = 0;
4132     }
4133     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4134     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4135     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4136     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4137     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4138   }
4139   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4140   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4141 
4142   /* set defaults for coarse KSP and PC */
4143   if (multilevel_allowed) {
4144     coarse_ksp_type = KSPRICHARDSON;
4145     coarse_pc_type = PCBDDC;
4146   } else {
4147     coarse_ksp_type = KSPPREONLY;
4148     coarse_pc_type = PCREDUNDANT;
4149   }
4150 
4151   /* print some info if requested */
4152   if (pcbddc->dbg_flag) {
4153     if (!multilevel_allowed) {
4154       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4155       if (multilevel_requested) {
4156         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4157       } else if (pcbddc->max_levels) {
4158         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4159       }
4160       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4161     }
4162   }
4163 
4164   /* create the coarse KSP object only once with defaults */
4165   if (coarse_mat_is) {
4166     MatReuse coarse_mat_reuse;
4167     PetscViewer dbg_viewer = NULL;
4168     if (pcbddc->dbg_flag) {
4169       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4170       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4171     }
4172     if (!pcbddc->coarse_ksp) {
4173       char prefix[256],str_level[16];
4174       size_t len;
4175       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4176       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4177       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4178       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4179       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4180       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4181       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4182       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4183       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4184       /* prefix */
4185       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4186       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4187       if (!pcbddc->current_level) {
4188         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4189         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4190       } else {
4191         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4192         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4193         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4194         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4195         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4196         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4197       }
4198       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4199       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4200       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4201       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4202       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4203       /* allow user customization */
4204       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4205     }
4206     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4207     if (nisdofs) {
4208       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4209       for (i=0;i<nisdofs;i++) {
4210         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4211       }
4212     }
4213     if (nisneu) {
4214       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4215       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4216     }
4217 
4218     /* get some info after set from options */
4219     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4220     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4221     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4222     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4223     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4224       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4225       isbddc = PETSC_FALSE;
4226     }
4227     if (isredundant) {
4228       KSP inner_ksp;
4229       PC inner_pc;
4230       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4231       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4232       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4233     }
4234 
4235     /* assemble coarse matrix */
4236     if (coarse_reuse) {
4237       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4238       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4239       coarse_mat_reuse = MAT_REUSE_MATRIX;
4240     } else {
4241       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4242     }
4243     if (isbddc || isnn) {
4244       if (pcbddc->coarsening_ratio > 1) {
4245         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4246           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4247           if (pcbddc->dbg_flag) {
4248             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4249             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4250             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4251             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4252           }
4253         }
4254         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4255       } else {
4256         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4257         coarse_mat = coarse_mat_is;
4258       }
4259     } else {
4260       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4261     }
4262     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4263 
4264     /* propagate symmetry info to coarse matrix */
4265     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4266     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4267 
4268     /* set operators */
4269     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4270     if (pcbddc->dbg_flag) {
4271       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4272     }
4273   } else { /* processes non partecipating to coarse solver (if any) */
4274     coarse_mat = 0;
4275   }
4276   ierr = PetscFree(isarray);CHKERRQ(ierr);
4277 #if 0
4278   {
4279     PetscViewer viewer;
4280     char filename[256];
4281     sprintf(filename,"coarse_mat.m");
4282     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4283     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4284     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4285     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4286   }
4287 #endif
4288 
4289   /* Compute coarse null space (special handling by BDDC only) */
4290   if (pcbddc->NullSpace) {
4291     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4292   }
4293 
4294   if (pcbddc->coarse_ksp) {
4295     Vec crhs,csol;
4296     PetscBool ispreonly;
4297     if (CoarseNullSpace) {
4298       if (isbddc) {
4299         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4300       } else {
4301         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4302       }
4303     }
4304     /* setup coarse ksp */
4305     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4306     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4307     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4308     /* hack */
4309     if (!csol) {
4310       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4311     }
4312     if (!crhs) {
4313       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4314     }
4315     /* Check coarse problem if in debug mode or if solving with an iterative method */
4316     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4317     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4318       KSP       check_ksp;
4319       KSPType   check_ksp_type;
4320       PC        check_pc;
4321       Vec       check_vec,coarse_vec;
4322       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4323       PetscInt  its;
4324       PetscBool compute_eigs;
4325       PetscReal *eigs_r,*eigs_c;
4326       PetscInt  neigs;
4327       const char *prefix;
4328 
4329       /* Create ksp object suitable for estimation of extreme eigenvalues */
4330       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4331       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4332       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4333       if (ispreonly) {
4334         check_ksp_type = KSPPREONLY;
4335         compute_eigs = PETSC_FALSE;
4336       } else {
4337         check_ksp_type = KSPGMRES;
4338         compute_eigs = PETSC_TRUE;
4339       }
4340       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4341       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4342       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4343       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4344       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4345       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4346       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4347       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4348       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4349       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4350       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4351       /* create random vec */
4352       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4353       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4354       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4355       if (CoarseNullSpace) {
4356         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4357       }
4358       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4359       /* solve coarse problem */
4360       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4361       if (CoarseNullSpace) {
4362         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4363       }
4364       /* set eigenvalue estimation if preonly has not been requested */
4365       if (compute_eigs) {
4366         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4367         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4368         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4369         lambda_max = eigs_r[neigs-1];
4370         lambda_min = eigs_r[0];
4371         if (pcbddc->use_coarse_estimates) {
4372           if (lambda_max>lambda_min) {
4373             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4374             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4375           }
4376         }
4377       }
4378 
4379       /* check coarse problem residual error */
4380       if (pcbddc->dbg_flag) {
4381         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4382         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4383         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4384         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4385         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4386         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4387         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4388         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4389         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4390         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4391         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4392         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4393         if (compute_eigs) {
4394           PetscReal lambda_max_s,lambda_min_s;
4395           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4396           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4397           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4398           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4399           for (i=0;i<neigs;i++) {
4400             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4401           }
4402         }
4403         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4404         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4405       }
4406       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4407       if (compute_eigs) {
4408         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4409         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4410       }
4411     }
4412   }
4413   /* print additional info */
4414   if (pcbddc->dbg_flag) {
4415     /* waits until all processes reaches this point */
4416     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4417     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4418     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4419   }
4420 
4421   /* free memory */
4422   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4423   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4429 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4430 {
4431   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4432   PC_IS*         pcis = (PC_IS*)pc->data;
4433   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4434   PetscInt       i,coarse_size=0;
4435   PetscInt       *local_primal_indices=NULL;
4436   PetscErrorCode ierr;
4437 
4438   PetscFunctionBegin;
4439   /* Compute global number of coarse dofs */
4440   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4441     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4442   }
4443   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4444 
4445   /* check numbering */
4446   if (pcbddc->dbg_flag) {
4447     PetscScalar coarsesum,*array;
4448     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4449 
4450     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4451     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4452     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4453     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4454     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4455     for (i=0;i<pcbddc->local_primal_size;i++) {
4456       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4457     }
4458     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4459     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4460     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4461     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4462     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4463     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4464     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4465     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4466     for (i=0;i<pcis->n;i++) {
4467       if (array[i] == 1.0) {
4468         set_error = PETSC_TRUE;
4469         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4470       }
4471     }
4472     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4473     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4474     for (i=0;i<pcis->n;i++) {
4475       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4476     }
4477     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4478     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4479     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4480     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4481     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4482     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4483     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4484       PetscInt *gidxs;
4485 
4486       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4487       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4488       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4489       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4490       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4491       for (i=0;i<pcbddc->local_primal_size;i++) {
4492         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4493       }
4494       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4495       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4496     }
4497     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4498     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4499   }
4500   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4501   /* get back data */
4502   *coarse_size_n = coarse_size;
4503   *local_primal_indices_n = local_primal_indices;
4504   PetscFunctionReturn(0);
4505 }
4506 
4507 #undef __FUNCT__
4508 #define __FUNCT__ "PCBDDCGlobalToLocal"
4509 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4510 {
4511   IS             localis_t;
4512   PetscInt       i,lsize,*idxs,n;
4513   PetscScalar    *vals;
4514   PetscErrorCode ierr;
4515 
4516   PetscFunctionBegin;
4517   /* get indices in local ordering exploiting local to global map */
4518   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4519   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4520   for (i=0;i<lsize;i++) vals[i] = 1.0;
4521   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4522   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4523   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4524   if (idxs) { /* multilevel guard */
4525     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4526   }
4527   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4528   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4529   ierr = PetscFree(vals);CHKERRQ(ierr);
4530   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4531   /* now compute set in local ordering */
4532   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4533   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4534   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4535   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4536   for (i=0,lsize=0;i<n;i++) {
4537     if (PetscRealPart(vals[i]) > 0.5) {
4538       lsize++;
4539     }
4540   }
4541   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4542   for (i=0,lsize=0;i<n;i++) {
4543     if (PetscRealPart(vals[i]) > 0.5) {
4544       idxs[lsize++] = i;
4545     }
4546   }
4547   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4548   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4549   *localis = localis_t;
4550   PetscFunctionReturn(0);
4551 }
4552 
4553 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4554 #undef __FUNCT__
4555 #define __FUNCT__ "PCBDDCMatMult_Private"
4556 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4557 {
4558   PCBDDCChange_ctx change_ctx;
4559   PetscErrorCode   ierr;
4560 
4561   PetscFunctionBegin;
4562   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4563   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4564   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4565   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4566   PetscFunctionReturn(0);
4567 }
4568 
4569 #undef __FUNCT__
4570 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4571 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4572 {
4573   PCBDDCChange_ctx change_ctx;
4574   PetscErrorCode   ierr;
4575 
4576   PetscFunctionBegin;
4577   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4578   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4579   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4580   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4581   PetscFunctionReturn(0);
4582 }
4583 
4584 #undef __FUNCT__
4585 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4586 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4587 {
4588   PC_IS               *pcis=(PC_IS*)pc->data;
4589   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4590   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4591   Mat                 S_j;
4592   PetscInt            *used_xadj,*used_adjncy;
4593   PetscBool           free_used_adj;
4594   PetscErrorCode      ierr;
4595 
4596   PetscFunctionBegin;
4597   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4598   free_used_adj = PETSC_FALSE;
4599   if (pcbddc->sub_schurs_layers == -1) {
4600     used_xadj = NULL;
4601     used_adjncy = NULL;
4602   } else {
4603     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4604       used_xadj = pcbddc->mat_graph->xadj;
4605       used_adjncy = pcbddc->mat_graph->adjncy;
4606     } else if (pcbddc->computed_rowadj) {
4607       used_xadj = pcbddc->mat_graph->xadj;
4608       used_adjncy = pcbddc->mat_graph->adjncy;
4609     } else {
4610       PetscBool      flg_row=PETSC_FALSE;
4611       const PetscInt *xadj,*adjncy;
4612       PetscInt       nvtxs;
4613 
4614       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4615       if (flg_row) {
4616         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4617         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4618         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4619         free_used_adj = PETSC_TRUE;
4620       } else {
4621         pcbddc->sub_schurs_layers = -1;
4622         used_xadj = NULL;
4623         used_adjncy = NULL;
4624       }
4625       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4626     }
4627   }
4628 
4629   /* setup sub_schurs data */
4630   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4631   if (!sub_schurs->use_mumps) {
4632     /* pcbddc->ksp_D up to date only if not using MUMPS */
4633     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4634     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4635   } else {
4636     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4637   }
4638   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4639 
4640   /* free adjacency */
4641   if (free_used_adj) {
4642     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4643   }
4644   PetscFunctionReturn(0);
4645 }
4646 
4647 #undef __FUNCT__
4648 #define __FUNCT__ "PCBDDCInitSubSchurs"
4649 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4650 {
4651   PC_IS               *pcis=(PC_IS*)pc->data;
4652   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4653   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4654   PCBDDCGraph         graph;
4655   PetscErrorCode      ierr;
4656 
4657   PetscFunctionBegin;
4658   /* attach interface graph for determining subsets */
4659   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4660     IS verticesIS;
4661 
4662     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4663     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4664     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4665     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4666     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4667     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4668 /*
4669     if (pcbddc->dbg_flag) {
4670       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4671     }
4672 */
4673   } else {
4674     graph = pcbddc->mat_graph;
4675   }
4676 
4677   /* sub_schurs init */
4678   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4679 
4680   /* free graph struct */
4681   if (pcbddc->sub_schurs_rebuild) {
4682     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4683   }
4684   PetscFunctionReturn(0);
4685 }
4686