xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 64efe5603a168e716b384ec40569cfa1ac3811f6)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
124                       nv+cum,&pcbddc->adaptive_constraints_idxs,
125                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
126   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   maxneigs = 0;
129   cum = cum2 = cumarray = 0;
130   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
131     const PetscInt *idxs;
132 
133     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
134     for (cum=0;cum<nv;cum++) {
135       pcbddc->adaptive_constraints_n[cum] = 1;
136       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
137       pcbddc->adaptive_constraints_ptrs[cum] = cum;
138       pcbddc->adaptive_constraints_data[cum] = 1.0;
139     }
140     cum2 = cum;
141     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
142   }
143 
144   if (mss) { /* multilevel */
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
147   }
148 
149   for (i=0;i<sub_schurs->n_subs;i++) {
150     PetscInt j,subset_size;
151 
152     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
153     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
154       const PetscInt *idxs;
155       PetscReal      infty = PETSC_MAX_REAL;
156       PetscInt       eigs_start = 0;
157       PetscBLASInt   B_N;
158 
159       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
160       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
161         if (sub_schurs->is_hermitian) {
162           PetscInt j;
163           for (j=0;j<subset_size;j++) {
164             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
165           }
166           for (j=0;j<subset_size;j++) {
167             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
168           }
169         } else {
170           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         }
173       } else {
174         S = Sarray + cumarray;
175         St = Starray + cumarray;
176       }
177 
178       /* Threshold: this is an heuristic for edges */
179       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
180       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
181 
182       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
183         PetscBLASInt B_itype = 1;
184         PetscBLASInt B_IL, B_IU;
185         PetscReal    eps = -1.0; /* dlamch? */
186         PetscInt     nmin_s;
187 
188         /* ask for eigenvalues larger than thresh */
189         if (pcbddc->dbg_flag) {
190           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
191         }
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           eigs_start = B_neigs -nmax;
214           B_neigs = nmax;
215         }
216 
217         nmin_s = PetscMin(nmin,B_N);
218         if (B_neigs < nmin_s) {
219           PetscBLASInt B_neigs2;
220 
221           B_IU = B_N - B_neigs;
222           B_IL = B_N - nmin_s + 1;
223           if (pcbddc->dbg_flag) {
224             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
225           }
226           if (sub_schurs->is_hermitian) {
227             PetscInt j;
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231             for (j=0;j<subset_size;j++) {
232               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
233             }
234           } else {
235             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237           }
238           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
239 #if defined(PETSC_USE_COMPLEX)
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
241 #else
242           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
243 #endif
244           ierr = PetscFPTrapPop();CHKERRQ(ierr);
245           B_neigs += B_neigs2;
246         }
247         if (B_ierr) {
248           if (B_ierr < 0 ) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
250           } else if (B_ierr <= B_N) {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
252           } else {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
254           }
255         }
256         if (pcbddc->dbg_flag) {
257           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
258           for (j=0;j<B_neigs;j++) {
259             if (eigs[j] == 0.0) {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
261             } else {
262               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
263             }
264           }
265         }
266       } else {
267           /* TODO */
268       }
269 
270       maxneigs = PetscMax(B_neigs,maxneigs);
271       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+cum2]);
280           }
281         }
282       }
283       for (j=0;j<B_neigs;j++) {
284 #if 0
285         {
286           PetscBLASInt Blas_N,Blas_one = 1.0;
287           PetscScalar norm;
288           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
289           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
290           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
291             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
292           } else {
293             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
294           }
295           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
296         }
297 #endif
298         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
299         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
300         cum2 += subset_size;
301       }
302       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
303     }
304     /* shift for next computation */
305     cumarray += subset_size*subset_size;
306   }
307   if (pcbddc->dbg_flag) {
308     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
309   }
310   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
311 
312   if (mss) {
313     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
314     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
315     /* destroy matrices (junk) */
316     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
317     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
318   }
319   if (allocated_S_St) {
320     ierr = PetscFree2(S,St);CHKERRQ(ierr);
321   }
322   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     Mat         C_B,CPHI;
974     IS          is_dummy;
975     Vec         mones;
976     MatType     checkmattype=MATSEQAIJ;
977     PetscReal   real_value;
978 
979     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
981     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
982     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
983     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
984     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
985     if (unsymmetric_check) {
986       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
987       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
988     }
989     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
990 
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
993     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
994     if (unsymmetric_check) {
995       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1006       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1007     } else {
1008       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1009       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1014       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1015       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1016     }
1017     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1021     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1024     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1025 
1026     /* check constraints */
1027     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1028     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1029     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1030     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1031     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1032     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1033     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1034     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1035     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1036     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1038     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1039 
1040     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1041     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1042     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1043     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1044     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1045     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1046     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1047     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1048     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1049     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1050     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1053       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1054     }
1055     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1056   }
1057 
1058   /* free memory */
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (n_vertices) {
1061     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1062   }
1063   if (n_constraints) {
1064     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1065   }
1066   /* get back data */
1067   *coarse_submat_vals_n = coarse_submat_vals;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1073 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1074 {
1075   Mat            *work_mat;
1076   IS             isrow_s,iscol_s;
1077   PetscBool      rsorted,csorted;
1078   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1083   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1084   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1085   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1086 
1087   if (!rsorted) {
1088     const PetscInt *idxs;
1089     PetscInt *idxs_sorted,i;
1090 
1091     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1092     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1093     for (i=0;i<rsize;i++) {
1094       idxs_perm_r[i] = i;
1095     }
1096     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1097     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1098     for (i=0;i<rsize;i++) {
1099       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1100     }
1101     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1102     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1105     isrow_s = isrow;
1106   }
1107 
1108   if (!csorted) {
1109     if (isrow == iscol) {
1110       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1111       iscol_s = isrow_s;
1112     } else {
1113       const PetscInt *idxs;
1114       PetscInt *idxs_sorted,i;
1115 
1116       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1117       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1118       for (i=0;i<csize;i++) {
1119         idxs_perm_c[i] = i;
1120       }
1121       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1122       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1123       for (i=0;i<csize;i++) {
1124         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1125       }
1126       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1127       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1128     }
1129   } else {
1130     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1131     iscol_s = iscol;
1132   }
1133 
1134   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1135 
1136   if (!rsorted || !csorted) {
1137     Mat      new_mat;
1138     IS       is_perm_r,is_perm_c;
1139 
1140     if (!rsorted) {
1141       PetscInt *idxs_r,i;
1142       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1143       for (i=0;i<rsize;i++) {
1144         idxs_r[idxs_perm_r[i]] = i;
1145       }
1146       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1147       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1148     } else {
1149       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1150     }
1151     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1152 
1153     if (!csorted) {
1154       if (isrow_s == iscol_s) {
1155         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1156         is_perm_c = is_perm_r;
1157       } else {
1158         PetscInt *idxs_c,i;
1159         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1160         for (i=0;i<csize;i++) {
1161           idxs_c[idxs_perm_c[i]] = i;
1162         }
1163         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1164         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1165       }
1166     } else {
1167       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1168     }
1169     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1170 
1171     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1173     work_mat[0] = new_mat;
1174     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1175     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1176   }
1177 
1178   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1179   *B = work_mat[0];
1180   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1181   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1182   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1188 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1189 {
1190   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1192   Mat            new_mat;
1193   IS             is_local,is_global;
1194   PetscInt       local_size;
1195   PetscBool      isseqaij;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1200   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1201   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1202   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1203   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1205   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1206 
1207   /* check */
1208   if (pcbddc->dbg_flag) {
1209     Vec       x,x_change;
1210     PetscReal error;
1211 
1212     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1213     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1214     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1215     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1221     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1222     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1223     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1224     ierr = VecDestroy(&x);CHKERRQ(ierr);
1225     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1226   }
1227 
1228   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1229   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1230   if (isseqaij) {
1231     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232   } else {
1233     Mat work_mat;
1234     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1235     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1237   }
1238   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1239 #if !defined(PETSC_USE_COMPLEX)
1240   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1241 #endif
1242   /*
1243   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1244   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1245   */
1246   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1252 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1253 {
1254   PC_IS*         pcis = (PC_IS*)(pc->data);
1255   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1256   IS             is_aux1,is_aux2;
1257   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1258   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1259   PetscInt       vbs,bs;
1260   PetscBT        bitmask;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   /*
1265     No need to setup local scatters if
1266       - primal space is unchanged
1267         AND
1268       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1269         AND
1270       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1271   */
1272   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1273     PetscFunctionReturn(0);
1274   }
1275   /* destroy old objects */
1276   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1278   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1279   /* Set Non-overlapping dimensions */
1280   n_B = pcis->n_B; n_D = pcis->n - n_B;
1281   n_vertices = pcbddc->n_actual_vertices;
1282   /* create auxiliary bitmask */
1283   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1284   for (i=0;i<n_vertices;i++) {
1285     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1286   }
1287 
1288   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1289   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1290   for (i=0, n_R=0; i<pcis->n; i++) {
1291     if (!PetscBTLookup(bitmask,i)) {
1292       idx_R_local[n_R] = i;
1293       n_R++;
1294     }
1295   }
1296 
1297   /* Block code */
1298   vbs = 1;
1299   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1300   if (bs>1 && !(n_vertices%bs)) {
1301     PetscBool is_blocked = PETSC_TRUE;
1302     PetscInt  *vary;
1303     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1304     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1305     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1306     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1307     for (i=0; i<n_vertices/bs; i++) {
1308       if (vary[i]!=0 && vary[i]!=bs) {
1309         is_blocked = PETSC_FALSE;
1310         break;
1311       }
1312     }
1313     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1314       vbs = bs;
1315       for (i=0;i<n_R/vbs;i++) {
1316         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1317       }
1318     }
1319     ierr = PetscFree(vary);CHKERRQ(ierr);
1320   }
1321   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1322   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1323 
1324   /* print some info if requested */
1325   if (pcbddc->dbg_flag) {
1326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1329     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1330     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1331     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1332     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1333     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1334   }
1335 
1336   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1337   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1339   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1340   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0; i<n_D; i++) {
1342     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1343   }
1344   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0, j=0; i<n_R; i++) {
1346     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1347       aux_array1[j++] = i;
1348     }
1349   }
1350   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1352   for (i=0, j=0; i<n_B; i++) {
1353     if (!PetscBTLookup(bitmask,is_indices[i])) {
1354       aux_array2[j++] = i;
1355     }
1356   }
1357   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1358   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1359   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1360   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1361   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1362 
1363   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1364     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1365     for (i=0, j=0; i<n_R; i++) {
1366       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1367         aux_array1[j++] = i;
1368       }
1369     }
1370     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1371     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1372     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1373   }
1374   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1375   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1382 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1383 {
1384   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1385   PC_IS          *pcis = (PC_IS*)pc->data;
1386   PC             pc_temp;
1387   Mat            A_RR;
1388   MatReuse       reuse;
1389   PetscScalar    m_one = -1.0;
1390   PetscReal      value;
1391   PetscInt       n_D,n_R,ibs,mbs;
1392   PetscBool      use_exact,use_exact_reduced,issbaij;
1393   PetscErrorCode ierr;
1394   /* prefixes stuff */
1395   char           dir_prefix[256],neu_prefix[256],str_level[16];
1396   size_t         len;
1397 
1398   PetscFunctionBegin;
1399 
1400   /* compute prefixes */
1401   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1402   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1403   if (!pcbddc->current_level) {
1404     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1405     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1406     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1407     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1410     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1411     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1412     len -= 15; /* remove "pc_bddc_coarse_" */
1413     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1414     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1415     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1416     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1417     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1418     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1419     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1420     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1421   }
1422 
1423   /* DIRICHLET PROBLEM */
1424   if (dirichlet) {
1425     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1426     if (pcbddc->issym) {
1427       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1428     }
1429     /* Matrix for Dirichlet problem is pcis->A_II */
1430     n_D = pcis->n - pcis->n_B;
1431     if (!pcbddc->ksp_D) { /* create object if not yet build */
1432       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1433       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1434       /* default */
1435       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1436       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1437       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1439       if (issbaij) {
1440         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1441       } else {
1442         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1443       }
1444       /* Allow user's customization */
1445       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1446       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1447     }
1448     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1449     if (sub_schurs->interior_solver) {
1450       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1451     }
1452     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1453     if (!n_D) {
1454       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1455       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1456     }
1457     /* Set Up KSP for Dirichlet problem of BDDC */
1458     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1459     /* set ksp_D into pcis data */
1460     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1461     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1462     pcis->ksp_D = pcbddc->ksp_D;
1463   }
1464 
1465   /* NEUMANN PROBLEM */
1466   A_RR = 0;
1467   if (neumann) {
1468     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1469     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1470     if (pcbddc->ksp_R) { /* already created ksp */
1471       PetscInt nn_R;
1472       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1473       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1474       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1475       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1476         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1477         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478         reuse = MAT_INITIAL_MATRIX;
1479       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1480         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1481           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482           reuse = MAT_INITIAL_MATRIX;
1483         } else { /* safe to reuse the matrix */
1484           reuse = MAT_REUSE_MATRIX;
1485         }
1486       }
1487       /* last check */
1488       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1489         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1490         reuse = MAT_INITIAL_MATRIX;
1491       }
1492     } else { /* first time, so we need to create the matrix */
1493       reuse = MAT_INITIAL_MATRIX;
1494     }
1495     /* extract A_RR */
1496     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1497     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1498     if (ibs != mbs) {
1499       Mat newmat;
1500       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1501       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1502       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1505     }
1506     if (pcbddc->issym) {
1507       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1508     }
1509     if (!pcbddc->ksp_R) { /* create object if not present */
1510       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1511       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1512       /* default */
1513       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1514       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1516       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1517       if (issbaij) {
1518         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1519       } else {
1520         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1521       }
1522       /* Allow user's customization */
1523       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1524       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1527     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1528     if (!n_R) {
1529       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1530       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1531     }
1532     /* Set Up KSP for Neumann problem of BDDC */
1533     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1534   }
1535 
1536   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1537   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1538     if (pcbddc->dbg_flag) {
1539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1542     }
1543     if (dirichlet) { /* Dirichlet */
1544       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1545       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1546       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1547       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1548       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1549       /* need to be adapted? */
1550       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1551       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1552       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1553       /* print info */
1554       if (pcbddc->dbg_flag) {
1555         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1556         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1557       }
1558       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1559         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1560       }
1561     }
1562     if (neumann) { /* Neumann */
1563       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1564       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1565       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1566       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1567       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1568       /* need to be adapted? */
1569       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1570       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1571       /* print info */
1572       if (pcbddc->dbg_flag) {
1573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1574         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1575       }
1576       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1577         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1578       }
1579     }
1580   }
1581   /* free Neumann problem's matrix */
1582   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1588 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1589 {
1590   PetscErrorCode ierr;
1591   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1592 
1593   PetscFunctionBegin;
1594   if (applytranspose) {
1595     if (pcbddc->local_auxmat1) {
1596       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1597       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1598     }
1599     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1600   } else {
1601     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1602     if (pcbddc->local_auxmat1) {
1603       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1604       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1605     }
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1611 #undef __FUNCT__
1612 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1613 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1614 {
1615   PetscErrorCode ierr;
1616   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1617   PC_IS*            pcis = (PC_IS*)  (pc->data);
1618   const PetscScalar zero = 0.0;
1619 
1620   PetscFunctionBegin;
1621   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1622   if (applytranspose) {
1623     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1624     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1625   } else {
1626     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1628   }
1629   /* start communications from local primal nodes to rhs of coarse solver */
1630   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1631   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1635   /* TODO remove null space when doing multilevel */
1636   if (pcbddc->coarse_ksp) {
1637     Vec rhs,sol;
1638 
1639     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1640     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1641     if (applytranspose) {
1642       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1643     } else {
1644       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1645     }
1646   }
1647 
1648   /* Local solution on R nodes */
1649   if (pcis->n) {
1650     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     }
1657     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1658     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (pcbddc->switch_static) {
1662       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     }
1665   }
1666 
1667   /* communications from coarse sol to local primal nodes */
1668   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1669   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1670 
1671   /* Sum contributions from two levels */
1672   if (applytranspose) {
1673     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1674     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1675   } else {
1676     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1677     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1684 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1685 {
1686   PetscErrorCode ierr;
1687   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1688   PetscScalar    *array;
1689   Vec            from,to;
1690 
1691   PetscFunctionBegin;
1692   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1693     from = pcbddc->coarse_vec;
1694     to = pcbddc->vec1_P;
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697 
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1700       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1702       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1703       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1704     }
1705   } else { /* from local to global -> put data in coarse right hand side */
1706     from = pcbddc->vec1_P;
1707     to = pcbddc->coarse_vec;
1708   }
1709   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1715 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1716 {
1717   PetscErrorCode ierr;
1718   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1719   PetscScalar    *array;
1720   Vec            from,to;
1721 
1722   PetscFunctionBegin;
1723   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1724     from = pcbddc->coarse_vec;
1725     to = pcbddc->vec1_P;
1726   } else { /* from local to global -> put data in coarse right hand side */
1727     from = pcbddc->vec1_P;
1728     to = pcbddc->coarse_vec;
1729   }
1730   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1731   if (smode == SCATTER_FORWARD) {
1732     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1733       Vec tvec;
1734 
1735       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1736       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1737       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1738       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1739     }
1740   } else {
1741     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1742      ierr = VecResetArray(from);CHKERRQ(ierr);
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* uncomment for testing purposes */
1749 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1750 #undef __FUNCT__
1751 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1752 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1753 {
1754   PetscErrorCode    ierr;
1755   PC_IS*            pcis = (PC_IS*)(pc->data);
1756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1758   /* one and zero */
1759   PetscScalar       one=1.0,zero=0.0;
1760   /* space to store constraints and their local indices */
1761   PetscScalar       *temp_quadrature_constraint;
1762   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1763   /* iterators */
1764   PetscInt          i,j,k,total_counts,temp_start_ptr;
1765   /* BLAS integers */
1766   PetscBLASInt      lwork,lierr;
1767   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1768   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1769   /* reuse */
1770   PetscInt          olocal_primal_size;
1771   PetscInt          *oprimal_indices_local_idxs;
1772   /* change of basis */
1773   PetscBool         boolforchange,qr_needed;
1774   PetscBT           touched,change_basis,qr_needed_idx;
1775   /* auxiliary stuff */
1776   PetscInt          *nnz,*is_indices;
1777   PetscInt          ncc;
1778   /* some quantities */
1779   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1780   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1781 
1782   PetscFunctionBegin;
1783   /* Destroy Mat objects computed previously */
1784   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1785   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1786   /* save info on constraints from previous setup (if any) */
1787   olocal_primal_size = pcbddc->local_primal_size;
1788   ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
1789   ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
1790   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1791 
1792   /* print some info */
1793   if (pcbddc->dbg_flag) {
1794     IS       vertices;
1795     PetscInt nv,nedges,nfaces;
1796     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1797     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1798     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1799     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1800     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1801     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1805   }
1806 
1807   if (!pcbddc->adaptive_selection) {
1808     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1809     MatNullSpace nearnullsp;
1810     const Vec    *nearnullvecs;
1811     Vec          *localnearnullsp;
1812     PetscScalar  *array;
1813     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1814     PetscBool    nnsp_has_cnst;
1815     /* LAPACK working arrays for SVD or POD */
1816     PetscBool    skip_lapack;
1817     PetscScalar  *work;
1818     PetscReal    *singular_vals;
1819 #if defined(PETSC_USE_COMPLEX)
1820     PetscReal    *rwork;
1821 #endif
1822 #if defined(PETSC_MISSING_LAPACK_GESVD)
1823     PetscScalar  *temp_basis,*correlation_mat;
1824 #else
1825     PetscBLASInt dummy_int=1;
1826     PetscScalar  dummy_scalar=1.;
1827 #endif
1828 
1829     /* Get index sets for faces, edges and vertices from graph */
1830     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1831     /* free unneeded index sets */
1832     if (!pcbddc->use_vertices) {
1833       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1834     }
1835     if (!pcbddc->use_edges) {
1836       for (i=0;i<n_ISForEdges;i++) {
1837         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1838       }
1839       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1840       n_ISForEdges = 0;
1841     }
1842     if (!pcbddc->use_faces) {
1843       for (i=0;i<n_ISForFaces;i++) {
1844         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1845       }
1846       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1847       n_ISForFaces = 0;
1848     }
1849     /* HACKS (the following two blocks of code) */
1850     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1851       pcbddc->use_change_of_basis = PETSC_TRUE;
1852       if (!ISForEdges) {
1853         pcbddc->use_change_on_faces = PETSC_TRUE;
1854       }
1855     }
1856     if (pcbddc->NullSpace) {
1857       /* use_change_of_basis should be consistent among processors */
1858       PetscBool tbool[2],gbool[2];
1859       tbool[0] = pcbddc->use_change_of_basis;
1860       tbool[1] = pcbddc->use_change_on_faces;
1861       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1862       pcbddc->use_change_of_basis = gbool[0];
1863       pcbddc->use_change_on_faces = gbool[1];
1864     }
1865 
1866     /* check if near null space is attached to global mat */
1867     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1868     if (nearnullsp) {
1869       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1870       /* remove any stored info */
1871       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1872       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1873       /* store information for BDDC solver reuse */
1874       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1875       pcbddc->onearnullspace = nearnullsp;
1876       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1877       for (i=0;i<nnsp_size;i++) {
1878         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1879       }
1880     } else { /* if near null space is not provided BDDC uses constants by default */
1881       nnsp_size = 0;
1882       nnsp_has_cnst = PETSC_TRUE;
1883     }
1884     /* get max number of constraints on a single cc */
1885     max_constraints = nnsp_size;
1886     if (nnsp_has_cnst) max_constraints++;
1887 
1888     /*
1889          Evaluate maximum storage size needed by the procedure
1890          - temp_indices will contain start index of each constraint stored as follows
1891          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1892          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1893          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1894                                                                                                                                                            */
1895     total_counts = n_ISForFaces+n_ISForEdges;
1896     total_counts *= max_constraints;
1897     n_vertices = 0;
1898     if (ISForVertices) {
1899       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1900     }
1901     total_counts += n_vertices;
1902     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1903     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1904     total_counts = 0;
1905     max_size_of_constraint = 0;
1906     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1907       if (i<n_ISForEdges) {
1908         used_IS = &ISForEdges[i];
1909       } else {
1910         used_IS = &ISForFaces[i-n_ISForEdges];
1911       }
1912       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1913       total_counts += j;
1914       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1915     }
1916     total_counts *= max_constraints;
1917     total_counts += n_vertices;
1918     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1919     /* get local part of global near null space vectors */
1920     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1921     for (k=0;k<nnsp_size;k++) {
1922       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1923       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1924       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1925     }
1926 
1927     /* whether or not to skip lapack calls */
1928     skip_lapack = PETSC_TRUE;
1929     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1930 
1931     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1932     if (!skip_lapack) {
1933       PetscScalar temp_work;
1934 
1935 #if defined(PETSC_MISSING_LAPACK_GESVD)
1936       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1937       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1938       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1939       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1940 #if defined(PETSC_USE_COMPLEX)
1941       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1942 #endif
1943       /* now we evaluate the optimal workspace using query with lwork=-1 */
1944       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1945       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1946       lwork = -1;
1947       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1948 #if !defined(PETSC_USE_COMPLEX)
1949       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1950 #else
1951       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1952 #endif
1953       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1954       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1955 #else /* on missing GESVD */
1956       /* SVD */
1957       PetscInt max_n,min_n;
1958       max_n = max_size_of_constraint;
1959       min_n = max_constraints;
1960       if (max_size_of_constraint < max_constraints) {
1961         min_n = max_size_of_constraint;
1962         max_n = max_constraints;
1963       }
1964       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1965 #if defined(PETSC_USE_COMPLEX)
1966       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1967 #endif
1968       /* now we evaluate the optimal workspace using query with lwork=-1 */
1969       lwork = -1;
1970       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1971       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1972       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1973       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1974 #if !defined(PETSC_USE_COMPLEX)
1975       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1976 #else
1977       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1978 #endif
1979       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1980       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1981 #endif /* on missing GESVD */
1982       /* Allocate optimal workspace */
1983       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1984       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1985     }
1986     /* Now we can loop on constraining sets */
1987     total_counts = 0;
1988     temp_indices[0] = 0;
1989     /* vertices */
1990     if (ISForVertices) {
1991       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1992       if (nnsp_has_cnst) { /* consider all vertices */
1993         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1994         for (i=0;i<n_vertices;i++) {
1995           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1996           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1997           total_counts++;
1998         }
1999       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2000         PetscBool used_vertex;
2001         for (i=0;i<n_vertices;i++) {
2002           used_vertex = PETSC_FALSE;
2003           k = 0;
2004           while (!used_vertex && k<nnsp_size) {
2005             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2006             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2007               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2008               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2009               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2010               total_counts++;
2011               used_vertex = PETSC_TRUE;
2012             }
2013             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2014             k++;
2015           }
2016         }
2017       }
2018       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2019       n_vertices = total_counts;
2020     }
2021 
2022     /* edges and faces */
2023     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2024       if (ncc<n_ISForEdges) {
2025         used_IS = &ISForEdges[ncc];
2026         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2027       } else {
2028         used_IS = &ISForFaces[ncc-n_ISForEdges];
2029         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2030       }
2031       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2032       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2033       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2034       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2035       /* change of basis should not be performed on local periodic nodes */
2036       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2037       if (nnsp_has_cnst) {
2038         PetscScalar quad_value;
2039         temp_constraints++;
2040         if (!pcbddc->use_nnsp_true) {
2041           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2042         } else {
2043           quad_value = 1.0;
2044         }
2045         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2046         for (j=0;j<size_of_constraint;j++) {
2047           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2048         }
2049         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2050         total_counts++;
2051       }
2052       for (k=0;k<nnsp_size;k++) {
2053         PetscReal real_value;
2054         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2055         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2056         for (j=0;j<size_of_constraint;j++) {
2057           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2058         }
2059         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2060         /* check if array is null on the connected component */
2061         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2062         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2063         if (real_value > 0.0) { /* keep indices and values */
2064           temp_constraints++;
2065           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2066           total_counts++;
2067         }
2068       }
2069       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2070       valid_constraints = temp_constraints;
2071       if (!pcbddc->use_nnsp_true && temp_constraints) {
2072         if (temp_constraints == 1) { /* just normalize the constraint */
2073           PetscScalar norm;
2074           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2075           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2076           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2077           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2078         } else { /* perform SVD */
2079           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2080 
2081 #if defined(PETSC_MISSING_LAPACK_GESVD)
2082           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2083              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2084              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2085                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2086                 from that computed using LAPACKgesvd
2087              -> This is due to a different computation of eigenvectors in LAPACKheev
2088              -> The quality of the POD-computed basis will be the same */
2089           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2090           /* Store upper triangular part of correlation matrix */
2091           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2092           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2093           for (j=0;j<temp_constraints;j++) {
2094             for (k=0;k<j+1;k++) {
2095               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2096             }
2097           }
2098           /* compute eigenvalues and eigenvectors of correlation matrix */
2099           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2100           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2101 #if !defined(PETSC_USE_COMPLEX)
2102           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2103 #else
2104           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2105 #endif
2106           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2107           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2108           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2109           j = 0;
2110           while (j < temp_constraints && singular_vals[j] < tol) j++;
2111           total_counts = total_counts-j;
2112           valid_constraints = temp_constraints-j;
2113           /* scale and copy POD basis into used quadrature memory */
2114           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2115           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2116           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2117           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2118           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2119           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2120           if (j<temp_constraints) {
2121             PetscInt ii;
2122             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2123             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2124             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2125             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2126             for (k=0;k<temp_constraints-j;k++) {
2127               for (ii=0;ii<size_of_constraint;ii++) {
2128                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2129               }
2130             }
2131           }
2132 #else  /* on missing GESVD */
2133           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2134           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2135           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2136           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2137 #if !defined(PETSC_USE_COMPLEX)
2138           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2139 #else
2140           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2141 #endif
2142           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2143           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2144           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2145           k = temp_constraints;
2146           if (k > size_of_constraint) k = size_of_constraint;
2147           j = 0;
2148           while (j < k && singular_vals[k-j-1] < tol) j++;
2149           valid_constraints = k-j;
2150           total_counts = total_counts-temp_constraints+valid_constraints;
2151 #endif /* on missing GESVD */
2152         }
2153       }
2154       /* setting change_of_basis flag is safe now */
2155       if (boolforchange) {
2156         for (j=0;j<valid_constraints;j++) {
2157           PetscBTSet(change_basis,total_counts-j-1);
2158         }
2159       }
2160     }
2161     /* free workspace */
2162     if (!skip_lapack) {
2163       ierr = PetscFree(work);CHKERRQ(ierr);
2164 #if defined(PETSC_USE_COMPLEX)
2165       ierr = PetscFree(rwork);CHKERRQ(ierr);
2166 #endif
2167       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2168 #if defined(PETSC_MISSING_LAPACK_GESVD)
2169       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2170       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2171 #endif
2172     }
2173     for (k=0;k<nnsp_size;k++) {
2174       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2175     }
2176     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2177     /* free index sets of faces, edges and vertices */
2178     for (i=0;i<n_ISForFaces;i++) {
2179       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2180     }
2181     if (n_ISForFaces) {
2182       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2183     }
2184     for (i=0;i<n_ISForEdges;i++) {
2185       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2186     }
2187     if (n_ISForEdges) {
2188       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2189     }
2190     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2191   } else {
2192     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2193     PetscInt        cum = 0;
2194 
2195     total_counts = 0;
2196     n_vertices = 0;
2197     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2198       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2199     }
2200     max_constraints = 0;
2201     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2202       total_counts += pcbddc->adaptive_constraints_n[i];
2203       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2204     }
2205     temp_indices = pcbddc->adaptive_constraints_ptrs;
2206     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2207     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2208 
2209 #if 0
2210     printf("Found %d totals\n",total_counts);
2211     for (i=0;i<total_counts;i++) {
2212       printf("const %d, start %d",i,temp_indices[i]);
2213       printf(" end %d:\n",temp_indices[i+1]);
2214       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2215         printf("  idxs %d",temp_indices_to_constraint[j]);
2216         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2217       }
2218     }
2219     for (i=0;i<n_vertices;i++) {
2220       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2221     }
2222     for (i=0;i<sub_schurs->n_subs;i++) {
2223       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2224     }
2225 #endif
2226 
2227     max_size_of_constraint = 0;
2228     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2229     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2230     /* Change of basis */
2231     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2232     if (pcbddc->use_change_of_basis) {
2233       cum = n_vertices;
2234       for (i=0;i<sub_schurs->n_subs;i++) {
2235         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2236           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2237             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2238           }
2239         }
2240         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2241       }
2242     }
2243   }
2244 
2245   /* map temp_indices_to_constraint in boundary numbering */
2246   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2247   if (i != temp_indices[total_counts]) {
2248     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2249   }
2250 
2251   /* set quantities in pcbddc data structure and store previous primal size */
2252   /* n_vertices defines the number of subdomain corners in the primal space */
2253   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2254   pcbddc->local_primal_size = total_counts;
2255   pcbddc->n_vertices = n_vertices;
2256   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2257 
2258   /* Create constraint matrix */
2259   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2260   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2261   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2262   /* array to compute a local numbering of constraints : vertices first then constraints */
2263   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2264   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2265 
2266   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2267   total_primal_vertices=0;
2268   for (i=0;i<pcbddc->local_primal_size;i++) {
2269     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2270     if (size_of_constraint == 1) {
2271       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2272       pcbddc->primal_indices_local_idxs[total_primal_vertices++]=temp_indices_to_constraint[temp_indices[i]];
2273     } else if (PetscBTLookup(change_basis,i)) {
2274       /* find first (by global ordering) untouched node in the cc */
2275       k = 0;
2276       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2277       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+k]);CHKERRQ(ierr);
2278       pcbddc->primal_indices_local_idxs[total_primal_vertices++]=temp_indices_to_constraint[temp_indices[i]+k];
2279     }
2280   }
2281   pcbddc->n_actual_vertices = total_primal_vertices;
2282   /* determine if a QR strategy is needed for change of basis */
2283   qr_needed = PETSC_FALSE;
2284   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2285   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2286     if (PetscBTLookup(change_basis,i)) {
2287       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2288         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2289         j = 0;
2290         for (k=0;k<size_of_constraint;k++) {
2291           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2292             j++;
2293           }
2294         }
2295         /* found more than one primal dof on the cc */
2296         if (j > 1) {
2297           PetscBTSet(qr_needed_idx,i);
2298           qr_needed = PETSC_TRUE;
2299         }
2300       } else {
2301         PetscBTSet(qr_needed_idx,i);
2302         qr_needed = PETSC_TRUE;
2303       }
2304     }
2305   }
2306   total_counts = total_primal_vertices;
2307   for (i=0;i<pcbddc->local_primal_size;i++) {
2308     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2309     if (size_of_constraint != 1 && !PetscBTLookup(change_basis,i)) {
2310       /* find first (by global ordering) untouched node in the cc */
2311       k = 0;
2312       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2313       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+k]);CHKERRQ(ierr);
2314       pcbddc->primal_indices_local_idxs[total_counts++]=temp_indices_to_constraint[temp_indices[i]+k];
2315     }
2316   }
2317 
2318   /* permute indices in order to have a sorted set of vertices */
2319   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2320 
2321   /* nonzero structure of constraint matrix */
2322   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2323   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2324   j=total_primal_vertices;
2325   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2326     if (!PetscBTLookup(change_basis,i)) {
2327       nnz[j]=temp_indices[i+1]-temp_indices[i];
2328       j++;
2329     }
2330   }
2331   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2332   ierr = PetscFree(nnz);CHKERRQ(ierr);
2333 
2334   /* set values in constraint matrix */
2335   for (i=0;i<total_primal_vertices;i++) {
2336     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2337   }
2338   total_counts = total_primal_vertices;
2339   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2340     if (!PetscBTLookup(change_basis,i)) {
2341       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2342       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2343       total_counts++;
2344     }
2345   }
2346   /* assembling */
2347   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349 
2350   /*
2351   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2352   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2353   */
2354   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2355   if (pcbddc->use_change_of_basis) {
2356     /* dual and primal dofs on a single cc */
2357     PetscInt     dual_dofs,primal_dofs;
2358     /* working stuff for GEQRF */
2359     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2360     PetscBLASInt lqr_work;
2361     /* working stuff for UNGQR */
2362     PetscScalar  *gqr_work,lgqr_work_t;
2363     PetscBLASInt lgqr_work;
2364     /* working stuff for TRTRS */
2365     PetscScalar  *trs_rhs;
2366     PetscBLASInt Blas_NRHS;
2367     /* pointers for values insertion into change of basis matrix */
2368     PetscInt     *start_rows,*start_cols;
2369     PetscScalar  *start_vals;
2370     /* working stuff for values insertion */
2371     PetscBT      is_primal;
2372     PetscInt     *aux_primal_numbering_B;
2373     /* matrix sizes */
2374     PetscInt     global_size,local_size;
2375     /* temporary change of basis */
2376     Mat          localChangeOfBasisMatrix;
2377     /* extra space for debugging */
2378     PetscScalar  *dbg_work;
2379 
2380     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2381     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2382     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2383     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2384     /* nonzeros for local mat */
2385     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2386     for (i=0;i<pcis->n;i++) nnz[i]=1;
2387     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2388       if (PetscBTLookup(change_basis,i)) {
2389         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2390         if (PetscBTLookup(qr_needed_idx,i)) {
2391           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2392         } else {
2393           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2394           /* get local primal index on the cc */
2395           j = 0;
2396           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2397           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2398         }
2399       }
2400     }
2401     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2402     ierr = PetscFree(nnz);CHKERRQ(ierr);
2403     /* Set initial identity in the matrix */
2404     for (i=0;i<pcis->n;i++) {
2405       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2406     }
2407 
2408     if (pcbddc->dbg_flag) {
2409       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2410       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2411     }
2412 
2413 
2414     /* Now we loop on the constraints which need a change of basis */
2415     /*
2416        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2417        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2418 
2419        Basic blocks of change of basis matrix T computed by
2420 
2421           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2422 
2423             | 1        0   ...        0         s_1/S |
2424             | 0        1   ...        0         s_2/S |
2425             |              ...                        |
2426             | 0        ...            1     s_{n-1}/S |
2427             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2428 
2429             with S = \sum_{i=1}^n s_i^2
2430             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2431                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2432 
2433           - QR decomposition of constraints otherwise
2434     */
2435     if (qr_needed) {
2436       /* space to store Q */
2437       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2438       /* first we issue queries for optimal work */
2439       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2440       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2441       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2442       lqr_work = -1;
2443       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2444       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2445       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2446       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2447       lgqr_work = -1;
2448       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2449       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2450       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2451       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2452       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2453       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2454       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2455       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2456       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2457       /* array to store scaling factors for reflectors */
2458       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2459       /* array to store rhs and solution of triangular solver */
2460       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2461       /* allocating workspace for check */
2462       if (pcbddc->dbg_flag) {
2463         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2464       }
2465     }
2466     /* array to store whether a node is primal or not */
2467     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2468     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2469     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->primal_indices_local_idxs,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2470     if (i != total_primal_vertices) {
2471       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2472     }
2473     for (i=0;i<total_primal_vertices;i++) {
2474       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2475     }
2476     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2477 
2478     /* loop on constraints and see whether or not they need a change of basis and compute it */
2479     /* -> using implicit ordering contained in temp_indices data */
2480     total_counts = pcbddc->n_vertices;
2481     while (total_counts<pcbddc->local_primal_size) {
2482       primal_dofs = 1;
2483       if (PetscBTLookup(change_basis,total_counts)) {
2484         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2485         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2486           primal_dofs++;
2487         }
2488         /* get constraint info */
2489         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2490         dual_dofs = size_of_constraint-primal_dofs;
2491 
2492         if (pcbddc->dbg_flag) {
2493           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2494         }
2495 
2496         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2497 
2498           /* copy quadrature constraints for change of basis check */
2499           if (pcbddc->dbg_flag) {
2500             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2501           }
2502           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2503           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2504 
2505           /* compute QR decomposition of constraints */
2506           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2507           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2508           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2509           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2510           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2511           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2512           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2513 
2514           /* explictly compute R^-T */
2515           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2516           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2517           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2518           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2519           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2520           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2521           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2522           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2523           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2524           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2525 
2526           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2527           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2528           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2529           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2530           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2531           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2532           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2533           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2534           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2535 
2536           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2537              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2538              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2539           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2540           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2541           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2542           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2543           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2544           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2545           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2546           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2547           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2548           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2549 
2550           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2551           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2552           /* insert cols for primal dofs */
2553           for (j=0;j<primal_dofs;j++) {
2554             start_vals = &qr_basis[j*size_of_constraint];
2555             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+j];
2556             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2557           }
2558           /* insert cols for dual dofs */
2559           for (j=0,k=0;j<dual_dofs;k++) {
2560             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2561               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2562               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2563               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2564               j++;
2565             }
2566           }
2567 
2568           /* check change of basis */
2569           if (pcbddc->dbg_flag) {
2570             PetscInt   ii,jj;
2571             PetscBool valid_qr=PETSC_TRUE;
2572             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2573             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2574             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2575             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2576             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2577             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2578             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2579             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2580             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2581             for (jj=0;jj<size_of_constraint;jj++) {
2582               for (ii=0;ii<primal_dofs;ii++) {
2583                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2584                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2585               }
2586             }
2587             if (!valid_qr) {
2588               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2589               for (jj=0;jj<size_of_constraint;jj++) {
2590                 for (ii=0;ii<primal_dofs;ii++) {
2591                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2592                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2593                   }
2594                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2595                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2596                   }
2597                 }
2598               }
2599             } else {
2600               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2601             }
2602           }
2603         } else { /* simple transformation block */
2604           PetscInt    row,col;
2605           PetscScalar val,norm;
2606 
2607           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2608           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2609           for (j=0;j<size_of_constraint;j++) {
2610             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2611             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2612             if (!PetscBTLookup(is_primal,row_B)) {
2613               col = temp_indices_to_constraint[temp_indices[total_counts]];
2614               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2615               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2616             } else {
2617               for (k=0;k<size_of_constraint;k++) {
2618                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2619                 if (row != col) {
2620                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]];
2621                 } else {
2622                   val = temp_quadrature_constraint[temp_indices[total_counts]]/norm;
2623                 }
2624                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2625               }
2626             }
2627           }
2628           if (pcbddc->dbg_flag) {
2629             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2630           }
2631         }
2632       } else {
2633         if (pcbddc->dbg_flag) {
2634           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2635         }
2636       }
2637       /* increment constraint counter total_counts */
2638       total_counts += primal_dofs;
2639     }
2640 
2641     /* free workspace */
2642     if (qr_needed) {
2643       if (pcbddc->dbg_flag) {
2644         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2645       }
2646       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2647       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2648       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2649       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2650       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2651     }
2652     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2653     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2654     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2655 
2656     /* assembling of global change of variable */
2657     {
2658       Mat      tmat;
2659       PetscInt bs;
2660 
2661       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2662       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2663       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2664       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2665       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2666       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2667       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2668       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2669       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2670       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2671       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2672       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2673       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2674       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2675       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2676       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2677       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2678       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2679     }
2680     /* check */
2681     if (pcbddc->dbg_flag) {
2682       PetscReal error;
2683       Vec       x,x_change;
2684 
2685       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2686       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2687       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2688       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2689       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2692       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2693       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2694       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2695       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2696       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2697       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2698       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2699       ierr = VecDestroy(&x);CHKERRQ(ierr);
2700       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2701     }
2702 
2703     /* adapt sub_schurs computed (if any) */
2704     if (pcbddc->use_deluxe_scaling) {
2705       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2706       if (sub_schurs->S_Ej_all) {
2707         Mat S_new,tmat;
2708         IS is_all_N;
2709 
2710         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2711         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2712         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2713         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2714         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2715         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2716         sub_schurs->S_Ej_all = S_new;
2717         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2718         if (sub_schurs->sum_S_Ej_all) {
2719           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2720           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2721           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2722           sub_schurs->sum_S_Ej_all = S_new;
2723           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2724         }
2725         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2726       }
2727     }
2728     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2729   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2730     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2731     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2732   }
2733 
2734   /* set up change of basis context */
2735   if (pcbddc->ChangeOfBasisMatrix) {
2736     PCBDDCChange_ctx change_ctx;
2737 
2738     if (!pcbddc->new_global_mat) {
2739       PetscInt global_size,local_size;
2740 
2741       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2742       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2743       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2744       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2745       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2746       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2747       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2748       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2749       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2750     } else {
2751       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2752       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2753       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2754     }
2755     if (!pcbddc->user_ChangeOfBasisMatrix) {
2756       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2757       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2758     } else {
2759       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2760       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2761     }
2762     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2763     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2764     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2765     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2766   }
2767 
2768   /* check if a new primal space has been introduced */
2769   pcbddc->new_primal_space_local = PETSC_TRUE;
2770   if (olocal_primal_size == pcbddc->local_primal_size) {
2771     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2772     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2773   }
2774   ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2775   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2776   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2777 
2778   /* flush dbg viewer */
2779   if (pcbddc->dbg_flag) {
2780     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2781   }
2782 
2783   /* free workspace */
2784   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2785   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2786   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2787   if (!pcbddc->adaptive_selection) {
2788     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2789     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2790   } else {
2791     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2792                       pcbddc->adaptive_constraints_ptrs,
2793                       pcbddc->adaptive_constraints_idxs,
2794                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2795     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2796   }
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 #undef __FUNCT__
2801 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2802 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2803 {
2804   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2805   PC_IS       *pcis = (PC_IS*)pc->data;
2806   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2807   PetscInt    ierr,i,vertex_size,N;
2808   PetscViewer viewer=pcbddc->dbg_viewer;
2809 
2810   PetscFunctionBegin;
2811   /* Reset previously computed graph */
2812   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2813   /* Init local Graph struct */
2814   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2815   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2816 
2817   /* Check validity of the csr graph passed in by the user */
2818   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2819     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2820   }
2821 
2822   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2823   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2824     PetscInt  *xadj,*adjncy;
2825     PetscInt  nvtxs;
2826     PetscBool flg_row=PETSC_FALSE;
2827 
2828     if (pcbddc->use_local_adj) {
2829 
2830       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2831       if (flg_row) {
2832         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2833         pcbddc->computed_rowadj = PETSC_TRUE;
2834       }
2835       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2836     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2837       IS                     is_dummy;
2838       ISLocalToGlobalMapping l2gmap_dummy;
2839       PetscInt               j,sum;
2840       PetscInt               *cxadj,*cadjncy;
2841       const PetscInt         *idxs;
2842       PCBDDCGraph            graph;
2843       PetscBT                is_on_boundary;
2844 
2845       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2846       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2847       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2848       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2849       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2850       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2851       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2852       if (flg_row) {
2853         graph->xadj = xadj;
2854         graph->adjncy = adjncy;
2855       }
2856       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2857       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2858       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2859 
2860       if (pcbddc->dbg_flag) {
2861         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2862         for (i=0;i<graph->ncc;i++) {
2863           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2864         }
2865       }
2866 
2867       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2868       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2869       for (i=0;i<pcis->n_B;i++) {
2870         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2871       }
2872       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2873 
2874       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2875       sum = 0;
2876       for (i=0;i<graph->ncc;i++) {
2877         PetscInt sizecc = 0;
2878         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2879           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2880             sizecc++;
2881           }
2882         }
2883         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2884           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2885             cxadj[graph->queue[j]] = sizecc;
2886           }
2887         }
2888         sum += sizecc*sizecc;
2889       }
2890       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2891       sum = 0;
2892       for (i=0;i<pcis->n;i++) {
2893         PetscInt temp = cxadj[i];
2894         cxadj[i] = sum;
2895         sum += temp;
2896       }
2897       cxadj[pcis->n] = sum;
2898       for (i=0;i<graph->ncc;i++) {
2899         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2900           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2901             PetscInt k,sizecc = 0;
2902             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2903               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2904                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2905                 sizecc++;
2906               }
2907             }
2908           }
2909         }
2910       }
2911       if (sum) {
2912         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2913       } else {
2914         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2915         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2916       }
2917       graph->xadj = 0;
2918       graph->adjncy = 0;
2919       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2920       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2921     }
2922   }
2923   if (pcbddc->dbg_flag) {
2924     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2925   }
2926 
2927   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2928   vertex_size = 1;
2929   if (pcbddc->user_provided_isfordofs) {
2930     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2931       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2932       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2933         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2934         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2935       }
2936       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2937       pcbddc->n_ISForDofs = 0;
2938       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2939     }
2940     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2941     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2942   } else {
2943     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2944       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2945       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2946       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2947         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2948       }
2949     }
2950   }
2951 
2952   /* Setup of Graph */
2953   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2954     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2955   }
2956   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2957     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2958   }
2959   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
2960 
2961   /* Graph's connected components analysis */
2962   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2963 
2964   /* print some info to stdout */
2965   if (pcbddc->dbg_flag) {
2966     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
2967   }
2968 
2969   /* mark topography has done */
2970   pcbddc->recompute_topography = PETSC_FALSE;
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 #undef __FUNCT__
2975 #define __FUNCT__ "PCBDDCSubsetNumbering"
2976 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2977 {
2978   Vec            local_vec,global_vec;
2979   IS             seqis,paris;
2980   VecScatter     scatter_ctx;
2981   PetscScalar    *array;
2982   PetscInt       *temp_global_dofs;
2983   PetscScalar    globalsum;
2984   PetscInt       i,j,s;
2985   PetscInt       nlocals,first_index,old_index,max_local,max_global;
2986   PetscMPIInt    rank_prec_comm,size_prec_comm;
2987   PetscInt       *dof_sizes,*dof_displs;
2988   PetscBool      first_found;
2989   PetscErrorCode ierr;
2990 
2991   PetscFunctionBegin;
2992   /* mpi buffers */
2993   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
2994   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
2995   j = ( !rank_prec_comm ? size_prec_comm : 0);
2996   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
2997   /* get maximum size of subset */
2998   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
2999   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3000   max_local = 0;
3001   for (i=0;i<n_local_dofs;i++) {
3002     if (max_local < temp_global_dofs[i] ) {
3003       max_local = temp_global_dofs[i];
3004     }
3005   }
3006   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3007   max_global++;
3008   max_local = 0;
3009   for (i=0;i<n_local_dofs;i++) {
3010     if (max_local < local_dofs[i] ) {
3011       max_local = local_dofs[i];
3012     }
3013   }
3014   max_local++;
3015   /* allocate workspace */
3016   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3017   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3018   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3019   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3020   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3021   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3022   /* create scatter */
3023   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3024   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3025   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3026   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3027   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3028   /* init array */
3029   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3030   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3031   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3032   if (local_dofs_mult) {
3033     for (i=0;i<n_local_dofs;i++) {
3034       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3035     }
3036   } else {
3037     for (i=0;i<n_local_dofs;i++) {
3038       array[local_dofs[i]]=1.0;
3039     }
3040   }
3041   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3042   /* scatter into global vec and get total number of global dofs */
3043   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3044   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3045   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3046   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3047   /* Fill global_vec with cumulative function for global numbering */
3048   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3049   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3050   nlocals = 0;
3051   first_index = -1;
3052   first_found = PETSC_FALSE;
3053   for (i=0;i<s;i++) {
3054     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3055       first_found = PETSC_TRUE;
3056       first_index = i;
3057     }
3058     nlocals += (PetscInt)PetscRealPart(array[i]);
3059   }
3060   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3061   if (!rank_prec_comm) {
3062     dof_displs[0]=0;
3063     for (i=1;i<size_prec_comm;i++) {
3064       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3065     }
3066   }
3067   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3068   if (first_found) {
3069     array[first_index] += (PetscScalar)nlocals;
3070     old_index = first_index;
3071     for (i=first_index+1;i<s;i++) {
3072       if (PetscRealPart(array[i]) > 0.1) {
3073         array[i] += array[old_index];
3074         old_index = i;
3075       }
3076     }
3077   }
3078   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3079   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3080   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3081   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3082   /* get global ordering of local dofs */
3083   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3084   if (local_dofs_mult) {
3085     for (i=0;i<n_local_dofs;i++) {
3086       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3087     }
3088   } else {
3089     for (i=0;i<n_local_dofs;i++) {
3090       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3091     }
3092   }
3093   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3094   /* free workspace */
3095   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3096   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3097   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3098   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3099   /* return pointer to global ordering of local dofs */
3100   *global_numbering_subset = temp_global_dofs;
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 #undef __FUNCT__
3105 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3106 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3107 {
3108   PetscInt       i,j;
3109   PetscScalar    *alphas;
3110   PetscErrorCode ierr;
3111 
3112   PetscFunctionBegin;
3113   /* this implements stabilized Gram-Schmidt */
3114   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3115   for (i=0;i<n;i++) {
3116     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3117     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3118     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3119   }
3120   ierr = PetscFree(alphas);CHKERRQ(ierr);
3121   PetscFunctionReturn(0);
3122 }
3123 
3124 #undef __FUNCT__
3125 #define __FUNCT__ "MatISGetSubassemblingPattern"
3126 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3127 {
3128   Mat             subdomain_adj;
3129   IS              new_ranks,ranks_send_to;
3130   MatPartitioning partitioner;
3131   Mat_IS          *matis;
3132   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3133   PetscInt        prank;
3134   PetscMPIInt     size,rank,color;
3135   PetscInt        *xadj,*adjncy,*oldranks;
3136   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3137   PetscInt        i,local_size,threshold=0;
3138   PetscErrorCode  ierr;
3139   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3140   PetscSubcomm    subcomm;
3141 
3142   PetscFunctionBegin;
3143   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3144   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3145   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3146 
3147   /* Get info on mapping */
3148   matis = (Mat_IS*)(mat->data);
3149   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3150   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3151 
3152   /* build local CSR graph of subdomains' connectivity */
3153   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3154   xadj[0] = 0;
3155   xadj[1] = PetscMax(n_neighs-1,0);
3156   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3157   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3158 
3159   if (threshold) {
3160     PetscInt xadj_count = 0;
3161     for (i=1;i<n_neighs;i++) {
3162       if (n_shared[i] > threshold) {
3163         adjncy[xadj_count] = neighs[i];
3164         adjncy_wgt[xadj_count] = n_shared[i];
3165         xadj_count++;
3166       }
3167     }
3168     xadj[1] = xadj_count;
3169   } else {
3170     if (xadj[1]) {
3171       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3172       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3173     }
3174   }
3175   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3176   if (use_square) {
3177     for (i=0;i<xadj[1];i++) {
3178       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3179     }
3180   }
3181   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3182 
3183   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3184 
3185   /*
3186     Restrict work on active processes only.
3187   */
3188   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3189   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3190   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3191   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3192   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3193   if (color) {
3194     ierr = PetscFree(xadj);CHKERRQ(ierr);
3195     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3196     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3197   } else {
3198     PetscInt coarsening_ratio;
3199     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3200     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3201     prank = rank;
3202     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3203     /*
3204     for (i=0;i<size;i++) {
3205       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3206     }
3207     */
3208     for (i=0;i<xadj[1];i++) {
3209       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3210     }
3211     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3212     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3213     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3214 
3215     /* Partition */
3216     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3217     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3218     if (use_vwgt) {
3219       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3220       v_wgt[0] = local_size;
3221       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3222     }
3223     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3224     coarsening_ratio = size/n_subdomains;
3225     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3226     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3227     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3228     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3229 
3230     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3231     if (contiguous) {
3232       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3233     } else {
3234       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3235     }
3236     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3237     /* clean up */
3238     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3239     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3240     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3241     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3242   }
3243   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3244 
3245   /* assemble parallel IS for sends */
3246   i = 1;
3247   if (color) i=0;
3248   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3249 
3250   /* get back IS */
3251   *is_sends = ranks_send_to;
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3256 
3257 #undef __FUNCT__
3258 #define __FUNCT__ "MatISSubassemble"
3259 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3260 {
3261   Mat                    local_mat;
3262   Mat_IS                 *matis;
3263   IS                     is_sends_internal;
3264   PetscInt               rows,cols,new_local_rows;
3265   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3266   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3267   ISLocalToGlobalMapping l2gmap;
3268   PetscInt*              l2gmap_indices;
3269   const PetscInt*        is_indices;
3270   MatType                new_local_type;
3271   /* buffers */
3272   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3273   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3274   PetscInt               *recv_buffer_idxs_local;
3275   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3276   /* MPI */
3277   MPI_Comm               comm,comm_n;
3278   PetscSubcomm           subcomm;
3279   PetscMPIInt            n_sends,n_recvs,commsize;
3280   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3281   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3282   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3283   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3284   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3285   PetscErrorCode         ierr;
3286 
3287   PetscFunctionBegin;
3288   /* TODO: add missing checks */
3289   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3290   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3291   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3292   PetscValidLogicalCollectiveInt(mat,nis,7);
3293   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3294   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3295   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3296   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3297   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3298   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3299   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3300   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3301     PetscInt mrows,mcols,mnrows,mncols;
3302     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3303     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3304     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3305     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3306     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3307     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3308   }
3309   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3310   PetscValidLogicalCollectiveInt(mat,bs,0);
3311   /* prepare IS for sending if not provided */
3312   if (!is_sends) {
3313     PetscBool pcontig = PETSC_TRUE;
3314     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3315     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3316   } else {
3317     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3318     is_sends_internal = is_sends;
3319   }
3320 
3321   /* get pointer of MATIS data */
3322   matis = (Mat_IS*)mat->data;
3323 
3324   /* get comm */
3325   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3326 
3327   /* compute number of sends */
3328   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3329   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3330 
3331   /* compute number of receives */
3332   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3333   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3334   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3335   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3336   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3337   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3338   ierr = PetscFree(iflags);CHKERRQ(ierr);
3339 
3340   /* restrict comm if requested */
3341   subcomm = 0;
3342   destroy_mat = PETSC_FALSE;
3343   if (restrict_comm) {
3344     PetscMPIInt color,subcommsize;
3345 
3346     color = 0;
3347     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3348     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3349     subcommsize = commsize - subcommsize;
3350     /* check if reuse has been requested */
3351     if (reuse == MAT_REUSE_MATRIX) {
3352       if (*mat_n) {
3353         PetscMPIInt subcommsize2;
3354         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3355         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3356         comm_n = PetscObjectComm((PetscObject)*mat_n);
3357       } else {
3358         comm_n = PETSC_COMM_SELF;
3359       }
3360     } else { /* MAT_INITIAL_MATRIX */
3361       PetscMPIInt rank;
3362 
3363       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3364       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3365       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3366       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3367       comm_n = PetscSubcommChild(subcomm);
3368     }
3369     /* flag to destroy *mat_n if not significative */
3370     if (color) destroy_mat = PETSC_TRUE;
3371   } else {
3372     comm_n = comm;
3373   }
3374 
3375   /* prepare send/receive buffers */
3376   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3377   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3378   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3379   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3380   if (nis) {
3381     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3382   }
3383 
3384   /* Get data from local matrices */
3385   if (!isdense) {
3386     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3387     /* TODO: See below some guidelines on how to prepare the local buffers */
3388     /*
3389        send_buffer_vals should contain the raw values of the local matrix
3390        send_buffer_idxs should contain:
3391        - MatType_PRIVATE type
3392        - PetscInt        size_of_l2gmap
3393        - PetscInt        global_row_indices[size_of_l2gmap]
3394        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3395     */
3396   } else {
3397     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3398     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3399     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3400     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3401     send_buffer_idxs[1] = i;
3402     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3403     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3404     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3405     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3406     for (i=0;i<n_sends;i++) {
3407       ilengths_vals[is_indices[i]] = len*len;
3408       ilengths_idxs[is_indices[i]] = len+2;
3409     }
3410   }
3411   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3412   /* additional is (if any) */
3413   if (nis) {
3414     PetscMPIInt psum;
3415     PetscInt j;
3416     for (j=0,psum=0;j<nis;j++) {
3417       PetscInt plen;
3418       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3419       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3420       psum += len+1; /* indices + lenght */
3421     }
3422     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3423     for (j=0,psum=0;j<nis;j++) {
3424       PetscInt plen;
3425       const PetscInt *is_array_idxs;
3426       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3427       send_buffer_idxs_is[psum] = plen;
3428       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3429       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3430       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3431       psum += plen+1; /* indices + lenght */
3432     }
3433     for (i=0;i<n_sends;i++) {
3434       ilengths_idxs_is[is_indices[i]] = psum;
3435     }
3436     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3437   }
3438 
3439   buf_size_idxs = 0;
3440   buf_size_vals = 0;
3441   buf_size_idxs_is = 0;
3442   for (i=0;i<n_recvs;i++) {
3443     buf_size_idxs += (PetscInt)olengths_idxs[i];
3444     buf_size_vals += (PetscInt)olengths_vals[i];
3445     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3446   }
3447   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3448   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3449   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3450 
3451   /* get new tags for clean communications */
3452   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3453   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3454   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3455 
3456   /* allocate for requests */
3457   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3458   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3459   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3460   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3461   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3462   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3463 
3464   /* communications */
3465   ptr_idxs = recv_buffer_idxs;
3466   ptr_vals = recv_buffer_vals;
3467   ptr_idxs_is = recv_buffer_idxs_is;
3468   for (i=0;i<n_recvs;i++) {
3469     source_dest = onodes[i];
3470     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3471     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3472     ptr_idxs += olengths_idxs[i];
3473     ptr_vals += olengths_vals[i];
3474     if (nis) {
3475       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3476       ptr_idxs_is += olengths_idxs_is[i];
3477     }
3478   }
3479   for (i=0;i<n_sends;i++) {
3480     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3481     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3482     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3483     if (nis) {
3484       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3485     }
3486   }
3487   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3488   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3489 
3490   /* assemble new l2g map */
3491   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3492   ptr_idxs = recv_buffer_idxs;
3493   new_local_rows = 0;
3494   for (i=0;i<n_recvs;i++) {
3495     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3496     ptr_idxs += olengths_idxs[i];
3497   }
3498   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3499   ptr_idxs = recv_buffer_idxs;
3500   new_local_rows = 0;
3501   for (i=0;i<n_recvs;i++) {
3502     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3503     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3504     ptr_idxs += olengths_idxs[i];
3505   }
3506   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3507   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3508   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3509 
3510   /* infer new local matrix type from received local matrices type */
3511   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3512   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3513   if (n_recvs) {
3514     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3515     ptr_idxs = recv_buffer_idxs;
3516     for (i=0;i<n_recvs;i++) {
3517       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3518         new_local_type_private = MATAIJ_PRIVATE;
3519         break;
3520       }
3521       ptr_idxs += olengths_idxs[i];
3522     }
3523     switch (new_local_type_private) {
3524       case MATDENSE_PRIVATE:
3525         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3526           new_local_type = MATSEQAIJ;
3527           bs = 1;
3528         } else { /* if I receive only 1 dense matrix */
3529           new_local_type = MATSEQDENSE;
3530           bs = 1;
3531         }
3532         break;
3533       case MATAIJ_PRIVATE:
3534         new_local_type = MATSEQAIJ;
3535         bs = 1;
3536         break;
3537       case MATBAIJ_PRIVATE:
3538         new_local_type = MATSEQBAIJ;
3539         break;
3540       case MATSBAIJ_PRIVATE:
3541         new_local_type = MATSEQSBAIJ;
3542         break;
3543       default:
3544         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3545         break;
3546     }
3547   } else { /* by default, new_local_type is seqdense */
3548     new_local_type = MATSEQDENSE;
3549     bs = 1;
3550   }
3551 
3552   /* create MATIS object if needed */
3553   if (reuse == MAT_INITIAL_MATRIX) {
3554     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3555     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3556   } else {
3557     /* it also destroys the local matrices */
3558     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3559   }
3560   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3561   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3562 
3563   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3564 
3565   /* Global to local map of received indices */
3566   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3567   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3568   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3569 
3570   /* restore attributes -> type of incoming data and its size */
3571   buf_size_idxs = 0;
3572   for (i=0;i<n_recvs;i++) {
3573     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3574     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3575     buf_size_idxs += (PetscInt)olengths_idxs[i];
3576   }
3577   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3578 
3579   /* set preallocation */
3580   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3581   if (!newisdense) {
3582     PetscInt *new_local_nnz=0;
3583 
3584     ptr_vals = recv_buffer_vals;
3585     ptr_idxs = recv_buffer_idxs_local;
3586     if (n_recvs) {
3587       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3588     }
3589     for (i=0;i<n_recvs;i++) {
3590       PetscInt j;
3591       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3592         for (j=0;j<*(ptr_idxs+1);j++) {
3593           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3594         }
3595       } else {
3596         /* TODO */
3597       }
3598       ptr_idxs += olengths_idxs[i];
3599     }
3600     if (new_local_nnz) {
3601       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3602       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3603       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3604       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3605       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3606       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3607     } else {
3608       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3609     }
3610     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3611   } else {
3612     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3613   }
3614 
3615   /* set values */
3616   ptr_vals = recv_buffer_vals;
3617   ptr_idxs = recv_buffer_idxs_local;
3618   for (i=0;i<n_recvs;i++) {
3619     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3620       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3621       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3622       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3623       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3624       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3625     } else {
3626       /* TODO */
3627     }
3628     ptr_idxs += olengths_idxs[i];
3629     ptr_vals += olengths_vals[i];
3630   }
3631   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3636   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3637 
3638 #if 0
3639   if (!restrict_comm) { /* check */
3640     Vec       lvec,rvec;
3641     PetscReal infty_error;
3642 
3643     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3644     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3645     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3646     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3647     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3648     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3649     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3650     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3651     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3652   }
3653 #endif
3654 
3655   /* assemble new additional is (if any) */
3656   if (nis) {
3657     PetscInt **temp_idxs,*count_is,j,psum;
3658 
3659     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3660     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3661     ptr_idxs = recv_buffer_idxs_is;
3662     psum = 0;
3663     for (i=0;i<n_recvs;i++) {
3664       for (j=0;j<nis;j++) {
3665         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3666         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3667         psum += plen;
3668         ptr_idxs += plen+1; /* shift pointer to received data */
3669       }
3670     }
3671     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3672     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3673     for (i=1;i<nis;i++) {
3674       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3675     }
3676     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3677     ptr_idxs = recv_buffer_idxs_is;
3678     for (i=0;i<n_recvs;i++) {
3679       for (j=0;j<nis;j++) {
3680         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3681         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3682         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3683         ptr_idxs += plen+1; /* shift pointer to received data */
3684       }
3685     }
3686     for (i=0;i<nis;i++) {
3687       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3688       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3689       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3690     }
3691     ierr = PetscFree(count_is);CHKERRQ(ierr);
3692     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3693     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3694   }
3695   /* free workspace */
3696   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3697   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3698   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3699   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3700   if (isdense) {
3701     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3702     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3703   } else {
3704     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3705   }
3706   if (nis) {
3707     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3708     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3709   }
3710   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3711   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3712   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3713   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3714   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3715   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3716   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3717   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3718   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3719   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3720   ierr = PetscFree(onodes);CHKERRQ(ierr);
3721   if (nis) {
3722     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3723     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3724     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3725   }
3726   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3727   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3728     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3729     for (i=0;i<nis;i++) {
3730       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3731     }
3732   }
3733   PetscFunctionReturn(0);
3734 }
3735 
3736 /* temporary hack into ksp private data structure */
3737 #include <petsc/private/kspimpl.h>
3738 
3739 #undef __FUNCT__
3740 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3741 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3742 {
3743   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3744   PC_IS                  *pcis = (PC_IS*)pc->data;
3745   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3746   MatNullSpace           CoarseNullSpace=NULL;
3747   ISLocalToGlobalMapping coarse_islg;
3748   IS                     coarse_is,*isarray;
3749   PetscInt               i,im_active=-1,active_procs=-1;
3750   PetscInt               nis,nisdofs,nisneu;
3751   PC                     pc_temp;
3752   PCType                 coarse_pc_type;
3753   KSPType                coarse_ksp_type;
3754   PetscBool              multilevel_requested,multilevel_allowed;
3755   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3756   Mat                    t_coarse_mat_is;
3757   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3758   PetscMPIInt            all_procs;
3759   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3760   PetscBool              compute_vecs = PETSC_FALSE;
3761   PetscScalar            *array;
3762   PetscErrorCode         ierr;
3763 
3764   PetscFunctionBegin;
3765   /* Assign global numbering to coarse dofs */
3766   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3767     PetscInt ocoarse_size;
3768     compute_vecs = PETSC_TRUE;
3769     ocoarse_size = pcbddc->coarse_size;
3770     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3771     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3772     /* see if we can avoid some work */
3773     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3774       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3775         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3776         coarse_reuse = PETSC_FALSE;
3777       } else { /* we can safely reuse already computed coarse matrix */
3778         coarse_reuse = PETSC_TRUE;
3779       }
3780     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3781       coarse_reuse = PETSC_FALSE;
3782     }
3783     /* reset any subassembling information */
3784     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3785     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3786   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3787     coarse_reuse = PETSC_TRUE;
3788   }
3789 
3790   /* count "active" (i.e. with positive local size) and "void" processes */
3791   im_active = !!(pcis->n);
3792   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3793   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3794   void_procs = all_procs-active_procs;
3795   csin_type_simple = PETSC_TRUE;
3796   redist = PETSC_FALSE;
3797   if (pcbddc->current_level && void_procs) {
3798     csin_ml = PETSC_TRUE;
3799     ncoarse_ml = void_procs;
3800     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3801     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3802       csin_ds = PETSC_TRUE;
3803       ncoarse_ds = pcbddc->redistribute_coarse;
3804       redist = PETSC_TRUE;
3805     } else {
3806       csin_ds = PETSC_TRUE;
3807       ncoarse_ds = active_procs;
3808       redist = PETSC_TRUE;
3809     }
3810   } else {
3811     csin_ml = PETSC_FALSE;
3812     ncoarse_ml = all_procs;
3813     if (void_procs) {
3814       csin_ds = PETSC_TRUE;
3815       ncoarse_ds = void_procs;
3816       csin_type_simple = PETSC_FALSE;
3817     } else {
3818       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3819         csin_ds = PETSC_TRUE;
3820         ncoarse_ds = pcbddc->redistribute_coarse;
3821         redist = PETSC_TRUE;
3822       } else {
3823         csin_ds = PETSC_FALSE;
3824         ncoarse_ds = all_procs;
3825       }
3826     }
3827   }
3828 
3829   /*
3830     test if we can go multilevel: three conditions must be satisfied:
3831     - we have not exceeded the number of levels requested
3832     - we can actually subassemble the active processes
3833     - we can find a suitable number of MPI processes where we can place the subassembled problem
3834   */
3835   multilevel_allowed = PETSC_FALSE;
3836   multilevel_requested = PETSC_FALSE;
3837   if (pcbddc->current_level < pcbddc->max_levels) {
3838     multilevel_requested = PETSC_TRUE;
3839     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3840       multilevel_allowed = PETSC_FALSE;
3841     } else {
3842       multilevel_allowed = PETSC_TRUE;
3843     }
3844   }
3845   /* determine number of process partecipating to coarse solver */
3846   if (multilevel_allowed) {
3847     ncoarse = ncoarse_ml;
3848     csin = csin_ml;
3849     redist = PETSC_FALSE;
3850   } else {
3851     ncoarse = ncoarse_ds;
3852     csin = csin_ds;
3853   }
3854 
3855   /* creates temporary l2gmap and IS for coarse indexes */
3856   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3857   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3858 
3859   /* creates temporary MATIS object for coarse matrix */
3860   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3861   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3862   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3863   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3864 #if 0
3865   {
3866     PetscViewer viewer;
3867     char filename[256];
3868     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3869     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3870     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3871     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3872     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3873   }
3874 #endif
3875   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3876   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3877   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3880 
3881   /* compute dofs splitting and neumann boundaries for coarse dofs */
3882   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3883     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3884     const PetscInt         *idxs;
3885     ISLocalToGlobalMapping tmap;
3886 
3887     /* create map between primal indices (in local representative ordering) and local primal numbering */
3888     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3889     /* allocate space for temporary storage */
3890     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3891     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3892     /* allocate for IS array */
3893     nisdofs = pcbddc->n_ISForDofsLocal;
3894     nisneu = !!pcbddc->NeumannBoundariesLocal;
3895     nis = nisdofs + nisneu;
3896     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3897     /* dofs splitting */
3898     for (i=0;i<nisdofs;i++) {
3899       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3900       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3901       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3902       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3903       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3904       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3905       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3906       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3907     }
3908     /* neumann boundaries */
3909     if (pcbddc->NeumannBoundariesLocal) {
3910       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3911       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3912       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3913       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3914       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3915       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3916       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3917       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3918     }
3919     /* free memory */
3920     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3921     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3922     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3923   } else {
3924     nis = 0;
3925     nisdofs = 0;
3926     nisneu = 0;
3927     isarray = NULL;
3928   }
3929   /* destroy no longer needed map */
3930   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3931 
3932   /* restrict on coarse candidates (if needed) */
3933   coarse_mat_is = NULL;
3934   if (csin) {
3935     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
3936       if (redist) {
3937         PetscMPIInt rank;
3938         PetscInt    spc,n_spc_p1,dest[1],destsize;
3939 
3940         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3941         spc = active_procs/ncoarse;
3942         n_spc_p1 = active_procs%ncoarse;
3943         if (im_active) {
3944           destsize = 1;
3945           if (rank > n_spc_p1*(spc+1)-1) {
3946             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
3947           } else {
3948             dest[0] = rank/(spc+1);
3949           }
3950         } else {
3951           destsize = 0;
3952         }
3953         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3954       } else if (csin_type_simple) {
3955         PetscMPIInt rank;
3956         PetscInt    issize,isidx;
3957 
3958         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
3959         if (im_active) {
3960           issize = 1;
3961           isidx = (PetscInt)rank;
3962         } else {
3963           issize = 0;
3964           isidx = -1;
3965         }
3966         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3967       } else { /* get a suitable subassembling pattern from MATIS code */
3968         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3969       }
3970 
3971       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
3972       if (!redist || ncoarse <= void_procs) {
3973         PetscInt ncoarse_cand,tissize,*nisindices;
3974         PetscInt *coarse_candidates;
3975         const PetscInt* tisindices;
3976 
3977         /* get coarse candidates' ranks in pc communicator */
3978         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
3979         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3980         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
3981           if (!coarse_candidates[i]) {
3982             coarse_candidates[ncoarse_cand++]=i;
3983           }
3984         }
3985         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
3986 
3987 
3988         if (pcbddc->dbg_flag) {
3989           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3990           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
3991           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
3992           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
3993           for (i=0;i<ncoarse_cand;i++) {
3994             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
3995           }
3996           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
3997           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3998         }
3999         /* shift the pattern on coarse candidates */
4000         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4001         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4002         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4003         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4004         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4005         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4006         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4007       }
4008       if (pcbddc->dbg_flag) {
4009         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4010         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4011         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4012         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4013       }
4014     }
4015     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4016     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4017   } else {
4018     if (pcbddc->dbg_flag) {
4019       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4020       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4021       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4022     }
4023     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4024     coarse_mat_is = t_coarse_mat_is;
4025   }
4026 
4027   /* create local to global scatters for coarse problem */
4028   if (compute_vecs) {
4029     PetscInt lrows;
4030     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4031     if (coarse_mat_is) {
4032       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4033     } else {
4034       lrows = 0;
4035     }
4036     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4037     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4038     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4039     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4040     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4041   }
4042   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4043   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4044 
4045   /* set defaults for coarse KSP and PC */
4046   if (multilevel_allowed) {
4047     coarse_ksp_type = KSPRICHARDSON;
4048     coarse_pc_type = PCBDDC;
4049   } else {
4050     coarse_ksp_type = KSPPREONLY;
4051     coarse_pc_type = PCREDUNDANT;
4052   }
4053 
4054   /* print some info if requested */
4055   if (pcbddc->dbg_flag) {
4056     if (!multilevel_allowed) {
4057       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4058       if (multilevel_requested) {
4059         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4060       } else if (pcbddc->max_levels) {
4061         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4062       }
4063       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4064     }
4065   }
4066 
4067   /* create the coarse KSP object only once with defaults */
4068   if (coarse_mat_is) {
4069     MatReuse coarse_mat_reuse;
4070     PetscViewer dbg_viewer = NULL;
4071     if (pcbddc->dbg_flag) {
4072       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4073       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4074     }
4075     if (!pcbddc->coarse_ksp) {
4076       char prefix[256],str_level[16];
4077       size_t len;
4078       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4079       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4080       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4081       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4082       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4083       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4084       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4085       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4086       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4087       /* prefix */
4088       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4089       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4090       if (!pcbddc->current_level) {
4091         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4092         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4093       } else {
4094         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4095         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4096         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4097         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4098         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4099         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4100       }
4101       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4102       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4103       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4104       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4105       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4106       /* allow user customization */
4107       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4108     }
4109     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4110     if (nisdofs) {
4111       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4112       for (i=0;i<nisdofs;i++) {
4113         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4114       }
4115     }
4116     if (nisneu) {
4117       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4118       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4119     }
4120 
4121     /* get some info after set from options */
4122     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4123     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4124     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4125     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4126     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4127       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4128       isbddc = PETSC_FALSE;
4129     }
4130     if (isredundant) {
4131       KSP inner_ksp;
4132       PC inner_pc;
4133       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4134       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4135       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4136     }
4137 
4138     /* assemble coarse matrix */
4139     if (coarse_reuse) {
4140       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4141       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4142       coarse_mat_reuse = MAT_REUSE_MATRIX;
4143     } else {
4144       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4145     }
4146     if (isbddc || isnn) {
4147       if (pcbddc->coarsening_ratio > 1) {
4148         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4149           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4150           if (pcbddc->dbg_flag) {
4151             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4152             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4153             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4154             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4155           }
4156         }
4157         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4158       } else {
4159         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4160         coarse_mat = coarse_mat_is;
4161       }
4162     } else {
4163       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4164     }
4165     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4166 
4167     /* propagate symmetry info to coarse matrix */
4168     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4169     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4170 
4171     /* set operators */
4172     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4173     if (pcbddc->dbg_flag) {
4174       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4175     }
4176   } else { /* processes non partecipating to coarse solver (if any) */
4177     coarse_mat = 0;
4178   }
4179   ierr = PetscFree(isarray);CHKERRQ(ierr);
4180 #if 0
4181   {
4182     PetscViewer viewer;
4183     char filename[256];
4184     sprintf(filename,"coarse_mat.m");
4185     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4186     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4187     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4188     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4189   }
4190 #endif
4191 
4192   /* Compute coarse null space (special handling by BDDC only) */
4193   if (pcbddc->NullSpace) {
4194     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4195   }
4196 
4197   if (pcbddc->coarse_ksp) {
4198     Vec crhs,csol;
4199     PetscBool ispreonly;
4200     if (CoarseNullSpace) {
4201       if (isbddc) {
4202         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4203       } else {
4204         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4205       }
4206     }
4207     /* setup coarse ksp */
4208     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4209     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4210     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4211     /* hack */
4212     if (!csol) {
4213       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4214     }
4215     if (!crhs) {
4216       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4217     }
4218     /* Check coarse problem if in debug mode or if solving with an iterative method */
4219     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4220     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4221       KSP       check_ksp;
4222       KSPType   check_ksp_type;
4223       PC        check_pc;
4224       Vec       check_vec,coarse_vec;
4225       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4226       PetscInt  its;
4227       PetscBool compute_eigs;
4228       PetscReal *eigs_r,*eigs_c;
4229       PetscInt  neigs;
4230       const char *prefix;
4231 
4232       /* Create ksp object suitable for estimation of extreme eigenvalues */
4233       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4234       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4235       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4236       if (ispreonly) {
4237         check_ksp_type = KSPPREONLY;
4238         compute_eigs = PETSC_FALSE;
4239       } else {
4240         check_ksp_type = KSPGMRES;
4241         compute_eigs = PETSC_TRUE;
4242       }
4243       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4244       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4245       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4246       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4247       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4248       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4249       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4250       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4251       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4252       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4253       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4254       /* create random vec */
4255       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4256       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4257       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4258       if (CoarseNullSpace) {
4259         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4260       }
4261       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4262       /* solve coarse problem */
4263       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4264       if (CoarseNullSpace) {
4265         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4266       }
4267       /* set eigenvalue estimation if preonly has not been requested */
4268       if (compute_eigs) {
4269         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4270         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4271         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4272         lambda_max = eigs_r[neigs-1];
4273         lambda_min = eigs_r[0];
4274         if (pcbddc->use_coarse_estimates) {
4275           if (lambda_max>lambda_min) {
4276             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4277             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4278           }
4279         }
4280       }
4281 
4282       /* check coarse problem residual error */
4283       if (pcbddc->dbg_flag) {
4284         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4285         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4286         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4287         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4288         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4289         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4290         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4291         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4292         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4293         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4294         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4295         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4296         if (compute_eigs) {
4297           PetscReal lambda_max_s,lambda_min_s;
4298           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4299           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4300           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4301           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4302           for (i=0;i<neigs;i++) {
4303             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4304           }
4305         }
4306         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4307         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4308       }
4309       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4310       if (compute_eigs) {
4311         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4312         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4313       }
4314     }
4315   }
4316   /* print additional info */
4317   if (pcbddc->dbg_flag) {
4318     /* waits until all processes reaches this point */
4319     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4320     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4321     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4322   }
4323 
4324   /* free memory */
4325   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4326   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4327   PetscFunctionReturn(0);
4328 }
4329 
4330 #undef __FUNCT__
4331 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4332 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4333 {
4334   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4335   PC_IS*         pcis = (PC_IS*)pc->data;
4336   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4337   PetscInt       i,coarse_size=0;
4338   PetscInt       *local_primal_indices=NULL;
4339   PetscErrorCode ierr;
4340 
4341   PetscFunctionBegin;
4342   /* Compute global number of coarse dofs */
4343   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4344     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4345   }
4346   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4347 
4348   /* check numbering */
4349   if (pcbddc->dbg_flag) {
4350     PetscScalar coarsesum,*array;
4351     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4352 
4353     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4354     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4355     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4356     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4357     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4358     for (i=0;i<pcbddc->local_primal_size;i++) {
4359       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4360     }
4361     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4362     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4363     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4364     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4365     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4366     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4367     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4368     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4369     for (i=0;i<pcis->n;i++) {
4370       if (array[i] == 1.0) {
4371         set_error = PETSC_TRUE;
4372         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4373       }
4374     }
4375     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4376     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4377     for (i=0;i<pcis->n;i++) {
4378       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4379     }
4380     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4381     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4382     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4383     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4384     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4385     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4386     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4387       PetscInt *gidxs;
4388 
4389       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4390       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4391       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4392       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4393       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4394       for (i=0;i<pcbddc->local_primal_size;i++) {
4395         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4396       }
4397       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4398       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4399     }
4400     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4401     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4402   }
4403   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4404   /* get back data */
4405   *coarse_size_n = coarse_size;
4406   *local_primal_indices_n = local_primal_indices;
4407   PetscFunctionReturn(0);
4408 }
4409 
4410 #undef __FUNCT__
4411 #define __FUNCT__ "PCBDDCGlobalToLocal"
4412 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4413 {
4414   IS             localis_t;
4415   PetscInt       i,lsize,*idxs,n;
4416   PetscScalar    *vals;
4417   PetscErrorCode ierr;
4418 
4419   PetscFunctionBegin;
4420   /* get indices in local ordering exploiting local to global map */
4421   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4422   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4423   for (i=0;i<lsize;i++) vals[i] = 1.0;
4424   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4425   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4426   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4427   if (idxs) { /* multilevel guard */
4428     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4429   }
4430   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4431   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4432   ierr = PetscFree(vals);CHKERRQ(ierr);
4433   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4434   /* now compute set in local ordering */
4435   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4436   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4437   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4438   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4439   for (i=0,lsize=0;i<n;i++) {
4440     if (PetscRealPart(vals[i]) > 0.5) {
4441       lsize++;
4442     }
4443   }
4444   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4445   for (i=0,lsize=0;i<n;i++) {
4446     if (PetscRealPart(vals[i]) > 0.5) {
4447       idxs[lsize++] = i;
4448     }
4449   }
4450   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4451   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4452   *localis = localis_t;
4453   PetscFunctionReturn(0);
4454 }
4455 
4456 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4457 #undef __FUNCT__
4458 #define __FUNCT__ "PCBDDCMatMult_Private"
4459 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4460 {
4461   PCBDDCChange_ctx change_ctx;
4462   PetscErrorCode   ierr;
4463 
4464   PetscFunctionBegin;
4465   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4466   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4467   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4468   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 #undef __FUNCT__
4473 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4474 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4475 {
4476   PCBDDCChange_ctx change_ctx;
4477   PetscErrorCode   ierr;
4478 
4479   PetscFunctionBegin;
4480   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4481   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4482   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4483   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4484   PetscFunctionReturn(0);
4485 }
4486 
4487 #undef __FUNCT__
4488 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4489 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4490 {
4491   PC_IS               *pcis=(PC_IS*)pc->data;
4492   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4493   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4494   Mat                 S_j;
4495   PetscInt            *used_xadj,*used_adjncy;
4496   PetscBool           free_used_adj;
4497   PetscErrorCode      ierr;
4498 
4499   PetscFunctionBegin;
4500   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4501   free_used_adj = PETSC_FALSE;
4502   if (pcbddc->sub_schurs_layers == -1) {
4503     used_xadj = NULL;
4504     used_adjncy = NULL;
4505   } else {
4506     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4507       used_xadj = pcbddc->mat_graph->xadj;
4508       used_adjncy = pcbddc->mat_graph->adjncy;
4509     } else if (pcbddc->computed_rowadj) {
4510       used_xadj = pcbddc->mat_graph->xadj;
4511       used_adjncy = pcbddc->mat_graph->adjncy;
4512     } else {
4513       PetscBool      flg_row=PETSC_FALSE;
4514       const PetscInt *xadj,*adjncy;
4515       PetscInt       nvtxs;
4516 
4517       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4518       if (flg_row) {
4519         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4520         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4521         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4522         free_used_adj = PETSC_TRUE;
4523       } else {
4524         pcbddc->sub_schurs_layers = -1;
4525         used_xadj = NULL;
4526         used_adjncy = NULL;
4527       }
4528       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4529     }
4530   }
4531 
4532   /* setup sub_schurs data */
4533   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4534   if (!sub_schurs->use_mumps) {
4535     /* pcbddc->ksp_D up to date only if not using MUMPS */
4536     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4537     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4538   } else {
4539     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4540   }
4541   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4542 
4543   /* free adjacency */
4544   if (free_used_adj) {
4545     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4546   }
4547   PetscFunctionReturn(0);
4548 }
4549 
4550 #undef __FUNCT__
4551 #define __FUNCT__ "PCBDDCInitSubSchurs"
4552 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4553 {
4554   PC_IS               *pcis=(PC_IS*)pc->data;
4555   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4556   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4557   PCBDDCGraph         graph;
4558   PetscErrorCode      ierr;
4559 
4560   PetscFunctionBegin;
4561   /* attach interface graph for determining subsets */
4562   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4563     IS verticesIS;
4564 
4565     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4566     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4567     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4568     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4569     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4570     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4571 /*
4572     if (pcbddc->dbg_flag) {
4573       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4574     }
4575 */
4576   } else {
4577     graph = pcbddc->mat_graph;
4578   }
4579 
4580   /* sub_schurs init */
4581   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4582 
4583   /* free graph struct */
4584   if (pcbddc->sub_schurs_rebuild) {
4585     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4586   }
4587   PetscFunctionReturn(0);
4588 }
4589