xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision aff50787371bd8f420eccc56824b469d42890d04)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size;
76       cum2 += subset_size*n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
112     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
124                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
125                       nv+cum,&pcbddc->adaptive_constraints_idxs,
126                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
127   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
128 
129   maxneigs = 0;
130   cum = cum2 = cumarray = 0;
131   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
132   pcbddc->adaptive_constraints_data_ptr[0] = 0;
133   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
134     const PetscInt *idxs;
135 
136     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
137     for (cum=0;cum<nv;cum++) {
138       pcbddc->adaptive_constraints_n[cum] = 1;
139       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
140       pcbddc->adaptive_constraints_data[cum] = 1.0;
141       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
142       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
143     }
144     cum2 = cum;
145     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
146   }
147 
148   if (mss) { /* multilevel */
149     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
150     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
151   }
152 
153   for (i=0;i<sub_schurs->n_subs;i++) {
154     PetscInt j,subset_size;
155 
156     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
157     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
158       const PetscInt *idxs;
159       PetscReal      infty = PETSC_MAX_REAL;
160       PetscInt       eigs_start = 0;
161       PetscBLASInt   B_N;
162       PetscBool      same_data = PETSC_FALSE;
163 
164       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
165       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
166         if (sub_schurs->is_hermitian) {
167           PetscInt j,k;
168           if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
169             ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
170             ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           }
172           for (j=0;j<subset_size;j++) {
173             for (k=j;k<subset_size;k++) {
174               S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
175               St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
176             }
177           }
178         } else {
179           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
180           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
181         }
182       } else {
183         S = Sarray + cumarray;
184         St = Starray + cumarray;
185       }
186 
187       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
188       /* see if we can save some work */
189       if (sub_schurs->n_subs == 1) {
190         ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
191       }
192 
193       if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
194         PetscPrintf(PETSC_COMM_SELF,"[%d] SAME DATA!!!!!!!!!!\n",PetscGlobalRank);
195         B_neigs = 0;
196       } else {
197         /* Threshold: this is an heuristic for edges */
198         thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
199 
200         if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
201           PetscBLASInt B_itype = 1;
202           PetscBLASInt B_IL, B_IU;
203           PetscReal    eps = -1.0; /* dlamch? */
204           PetscInt     nmin_s;
205 
206           /* ask for eigenvalues larger than thresh */
207           if (pcbddc->dbg_flag) {
208             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
209           }
210           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
211 #if defined(PETSC_USE_COMPLEX)
212           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
213 #else
214           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
215 #endif
216           ierr = PetscFPTrapPop();CHKERRQ(ierr);
217           if (B_ierr) {
218             if (B_ierr < 0 ) {
219               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
220             } else if (B_ierr <= B_N) {
221               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
222             } else {
223               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
224             }
225           }
226 
227           if (B_neigs > nmax) {
228             if (pcbddc->dbg_flag) {
229               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
230             }
231             eigs_start = B_neigs -nmax;
232             B_neigs = nmax;
233           }
234 
235           nmin_s = PetscMin(nmin,B_N);
236           if (B_neigs < nmin_s) {
237             PetscBLASInt B_neigs2;
238 
239             B_IU = B_N - B_neigs;
240             B_IL = B_N - nmin_s + 1;
241             if (pcbddc->dbg_flag) {
242               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
243             }
244             if (sub_schurs->is_hermitian) {
245               PetscInt j;
246               for (j=0;j<subset_size;j++) {
247                 ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
248               }
249               for (j=0;j<subset_size;j++) {
250                 ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
251               }
252             } else {
253               ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
254               ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
255             }
256             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
257 #if defined(PETSC_USE_COMPLEX)
258             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
259 #else
260             PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
261 #endif
262             ierr = PetscFPTrapPop();CHKERRQ(ierr);
263             B_neigs += B_neigs2;
264           }
265           if (B_ierr) {
266             if (B_ierr < 0 ) {
267               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
268             } else if (B_ierr <= B_N) {
269               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
270             } else {
271               SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
272             }
273           }
274           if (pcbddc->dbg_flag) {
275             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
276             for (j=0;j<B_neigs;j++) {
277               if (eigs[j] == 0.0) {
278                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
279               } else {
280                 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
281               }
282             }
283           }
284         } else {
285             /* TODO */
286         }
287       }
288       maxneigs = PetscMax(B_neigs,maxneigs);
289       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
290       if (B_neigs) {
291         ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293         if (pcbddc->dbg_flag > 1) {
294           PetscInt ii;
295           for (ii=0;ii<B_neigs;ii++) {
296             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
297             for (j=0;j<B_N;j++) {
298               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
299             }
300           }
301         }
302 #if 0
303         for (j=0;j<B_neigs;j++) {
304           PetscBLASInt Blas_N,Blas_one = 1.0;
305           PetscScalar norm;
306           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
307           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
308                                                      &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
309           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
310             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
311           } else {
312             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
313           }
314           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
315         }
316 #endif
317         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
318         pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
319         pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
320         cum++;
321       }
322       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
323     }
324     /* shift for next computation */
325     cumarray += subset_size*subset_size;
326   }
327   if (pcbddc->dbg_flag) {
328     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
329   }
330 
331   if (mss) {
332     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
333     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
334     /* destroy matrices (junk) */
335     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
336     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
337   }
338   if (allocated_S_St) {
339     ierr = PetscFree2(S,St);CHKERRQ(ierr);
340   }
341   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
342 #if defined(PETSC_USE_COMPLEX)
343   ierr = PetscFree(rwork);CHKERRQ(ierr);
344 #endif
345   if (pcbddc->dbg_flag) {
346     PetscInt maxneigs_r;
347     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
348     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
349   }
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "PCBDDCSetUpSolvers"
355 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
356 {
357   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
358   PetscScalar    *coarse_submat_vals;
359   PetscErrorCode ierr;
360 
361   PetscFunctionBegin;
362   /* Setup local scatters R_to_B and (optionally) R_to_D */
363   /* PCBDDCSetUpLocalWorkVectors should be called first! */
364   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
365 
366   /* Setup local neumann solver ksp_R */
367   /* PCBDDCSetUpLocalScatters should be called first! */
368   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
369 
370   /* Change global null space passed in by the user if change of basis has been requested */
371   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
372     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
373   }
374 
375   /*
376      Setup local correction and local part of coarse basis.
377      Gives back the dense local part of the coarse matrix in column major ordering
378   */
379   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
380 
381   /* Compute total number of coarse nodes and setup coarse solver */
382   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
383 
384   /* free */
385   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "PCBDDCResetCustomization"
391 PetscErrorCode PCBDDCResetCustomization(PC pc)
392 {
393   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
394   PetscErrorCode ierr;
395 
396   PetscFunctionBegin;
397   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
398   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
399   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
400   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
401   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
402   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
403   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
404   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
405   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
406   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
407   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "PCBDDCResetTopography"
413 PetscErrorCode PCBDDCResetTopography(PC pc)
414 {
415   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
416   PetscErrorCode ierr;
417 
418   PetscFunctionBegin;
419   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
421   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
422   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
423   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "PCBDDCResetSolvers"
429 PetscErrorCode PCBDDCResetSolvers(PC pc)
430 {
431   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
432   PetscScalar    *array;
433   PetscErrorCode ierr;
434 
435   PetscFunctionBegin;
436   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
437   if (pcbddc->coarse_phi_B) {
438     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
439     ierr = PetscFree(array);CHKERRQ(ierr);
440   }
441   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
442   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
444   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
445   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
446   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
447   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
448   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
449   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
450   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
451   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
452   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
453   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
454   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
455   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
456   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
457   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
458   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
459   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
460   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
461   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
462   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
463   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
464   PetscFunctionReturn(0);
465 }
466 
467 #undef __FUNCT__
468 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
469 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
470 {
471   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
472   PC_IS          *pcis = (PC_IS*)pc->data;
473   VecType        impVecType;
474   PetscInt       n_constraints,n_R,old_size;
475   PetscErrorCode ierr;
476 
477   PetscFunctionBegin;
478   if (!pcbddc->ConstraintMatrix) {
479     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
480   }
481   /* get sizes */
482   n_constraints = pcbddc->local_primal_size - pcbddc->n_vertices;
483   n_R = pcis->n-pcbddc->n_vertices;
484   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
485   /* local work vectors (try to avoid unneeded work)*/
486   /* R nodes */
487   old_size = -1;
488   if (pcbddc->vec1_R) {
489     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
490   }
491   if (n_R != old_size) {
492     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
493     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
494     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
495     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
496     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
497     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
498   }
499   /* local primal dofs */
500   old_size = -1;
501   if (pcbddc->vec1_P) {
502     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
503   }
504   if (pcbddc->local_primal_size != old_size) {
505     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
506     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
507     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
508     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
509   }
510   /* local explicit constraints */
511   old_size = -1;
512   if (pcbddc->vec1_C) {
513     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
514   }
515   if (n_constraints && n_constraints != old_size) {
516     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
517     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
518     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
519     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "PCBDDCSetUpCorrection"
526 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
527 {
528   PetscErrorCode  ierr;
529   /* pointers to pcis and pcbddc */
530   PC_IS*          pcis = (PC_IS*)pc->data;
531   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
532   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
533   /* submatrices of local problem */
534   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
535   /* submatrices of local coarse problem */
536   Mat             S_VV,S_CV,S_VC,S_CC;
537   /* working matrices */
538   Mat             C_CR;
539   /* additional working stuff */
540   PC              pc_R;
541   Mat             F;
542   PetscBool       isLU,isCHOL,isILU;
543 
544   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
545   PetscScalar     *work;
546   PetscInt        *idx_V_B;
547   PetscInt        n,n_vertices,n_constraints;
548   PetscInt        i,n_R,n_D,n_B;
549   PetscBool       unsymmetric_check;
550   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
551   MatType         impMatType;
552   /* some shortcuts to scalars */
553   PetscScalar     one=1.0,m_one=-1.0;
554 
555   PetscFunctionBegin;
556   n_vertices = pcbddc->n_vertices;
557   n_constraints = pcbddc->local_primal_size-n_vertices;
558   /* Set Non-overlapping dimensions */
559   n_B = pcis->n_B;
560   n_D = pcis->n - n_B;
561   n_R = pcis->n - n_vertices;
562 
563   /* Set types for local objects needed by BDDC precondtioner */
564   impMatType = MATSEQDENSE;
565 
566   /* vertices in boundary numbering */
567   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
568   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
569   if (i != n_vertices) {
570     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
571   }
572 
573   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
574   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
575   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
576   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
577   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
578   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
579   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
580   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
581   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
582   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
583 
584   unsymmetric_check = PETSC_FALSE;
585   /* allocate workspace */
586   n = 0;
587   if (n_constraints) {
588     n += n_R*n_constraints;
589   }
590   if (n_vertices) {
591     n = PetscMax(2*n_R*n_vertices,n);
592     n = PetscMax((n_R+n_B)*n_vertices,n);
593   }
594   if (!pcbddc->issym) {
595     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
596     unsymmetric_check = PETSC_TRUE;
597   }
598   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
599 
600   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
601   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
602   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
603   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
604   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
605   if (isLU || isILU || isCHOL) {
606     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
607   } else if (sub_schurs->reuse_mumps) {
608     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
609     MatFactorType type;
610 
611     F = reuse_mumps->F;
612     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
613     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
614   } else {
615     F = NULL;
616   }
617 
618   /* Precompute stuffs needed for preprocessing and application of BDDC*/
619   if (n_constraints) {
620     Mat         M1,M2,M3;
621     Mat         auxmat;
622     IS          is_aux;
623     PetscScalar *array,*array2;
624 
625     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
626     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
627 
628     /* Extract constraints on R nodes: C_{CR}  */
629     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
630     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
631     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
632 
633     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
634     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
635     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
636     for (i=0;i<n_constraints;i++) {
637       const PetscScalar *row_cmat_values;
638       const PetscInt    *row_cmat_indices;
639       PetscInt          size_of_constraint,j;
640 
641       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
642       for (j=0;j<size_of_constraint;j++) {
643         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
644       }
645       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
646     }
647     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
648     if (F) {
649       Mat B;
650 
651       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
652       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
653       ierr = MatDestroy(&B);CHKERRQ(ierr);
654     } else {
655       PetscScalar *marr;
656 
657       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
658       for (i=0;i<n_constraints;i++) {
659         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
660         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
661         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
662         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
663         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
664       }
665       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
666     }
667     if (!pcbddc->switch_static) {
668       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
669       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
670       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
671       for (i=0;i<n_constraints;i++) {
672         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
673         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
674         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
675         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
676         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
677         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
678       }
679       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
680       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
681       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
682     } else {
683       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
684       pcbddc->local_auxmat2 = local_auxmat2_R;
685       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
686     }
687     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
688     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
689     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
690     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
691     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
692     if (isCHOL) {
693       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
694     } else {
695       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
696     }
697     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
698     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
699     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
700     ierr = MatDestroy(&M2);CHKERRQ(ierr);
701     ierr = MatDestroy(&M3);CHKERRQ(ierr);
702     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
703     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
704     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
705     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
706     ierr = MatDestroy(&M1);CHKERRQ(ierr);
707   }
708   /* Get submatrices from subdomain matrix */
709   if (n_vertices) {
710     Mat       newmat;
711     IS        is_aux;
712     PetscInt  ibs,mbs;
713     PetscBool issbaij;
714 
715     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
716       IS tis;
717 
718       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
719       ierr = ISSort(tis);CHKERRQ(ierr);
720       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
721       ierr = ISDestroy(&tis);CHKERRQ(ierr);
722     } else {
723       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
724     }
725     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
726     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
727     if (ibs != mbs) { /* need to convert to SEQAIJ */
728       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
729       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
730       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
731       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
732       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
733       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
734     } else {
735       /* this is safe */
736       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
737       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
738       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
739         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
740         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
741         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
742         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
743         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
744         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
745       } else {
746         ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
747         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
748         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
749       }
750       /* need to convert to SeqAIJ for MatMatMult with dense matrix */
751       ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
752     }
753     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
754   }
755 
756   /* Matrix of coarse basis functions (local) */
757   if (pcbddc->coarse_phi_B) {
758     PetscInt on_B,on_primal,on_D=n_D;
759     if (pcbddc->coarse_phi_D) {
760       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
761     }
762     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
763     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
764       PetscScalar *marray;
765 
766       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
767       ierr = PetscFree(marray);CHKERRQ(ierr);
768       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
769       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
770       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
771       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
772     }
773   }
774 
775   if (!pcbddc->coarse_phi_B) {
776     PetscScalar *marray;
777 
778     n = n_B*pcbddc->local_primal_size;
779     if (pcbddc->switch_static || pcbddc->dbg_flag) {
780       n += n_D*pcbddc->local_primal_size;
781     }
782     if (!pcbddc->issym) {
783       n *= 2;
784     }
785     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
786     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
787     n = n_B*pcbddc->local_primal_size;
788     if (pcbddc->switch_static || pcbddc->dbg_flag) {
789       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
790       n += n_D*pcbddc->local_primal_size;
791     }
792     if (!pcbddc->issym) {
793       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
794       if (pcbddc->switch_static || pcbddc->dbg_flag) {
795         n = n_B*pcbddc->local_primal_size;
796         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
797       }
798     } else {
799       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
800       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
801       if (pcbddc->switch_static || pcbddc->dbg_flag) {
802         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
803         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
804       }
805     }
806   }
807   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
808   /* vertices */
809   if (n_vertices) {
810 
811     if (n_R) {
812       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
813       PetscBLASInt B_N,B_one = 1;
814       PetscScalar  *x,*y;
815 
816       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
817       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
818       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
819       if (F) { /* TODO could be optimized for symmetric problems */
820         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
821       } else {
822         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
823         for (i=0;i<n_vertices;i++) {
824           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
825           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
826           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
827           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
828           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
829         }
830         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
831       }
832       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
833       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
834       if (n_constraints) {
835         Mat B;
836 
837         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
838         for (i=0;i<n_vertices;i++) {
839           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
840           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
841           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
844           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
845         }
846         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
847         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
848         ierr = MatDestroy(&B);CHKERRQ(ierr);
849         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
850         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
851         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
852         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
853         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
854         ierr = MatDestroy(&B);CHKERRQ(ierr);
855       }
856       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
857       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
858       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
859       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
860       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
861       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
862       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
863       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
864       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
865       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
866     } else {
867       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
868     }
869     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
870     /* coarse basis functions */
871     for (i=0;i<n_vertices;i++) {
872       PetscScalar *y;
873 
874       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
875       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
876       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
877       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879       y[n_B*i+idx_V_B[i]] = 1.0;
880       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
881       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
882 
883       if (pcbddc->switch_static || pcbddc->dbg_flag) {
884         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
885         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
886         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
889         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
890       }
891       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
892     }
893   }
894 
895   if (n_constraints) {
896     Mat B;
897 
898     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
899     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
900     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
901     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
902     if (n_vertices) {
903       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
904         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
905       } else {
906         Mat S_VCt;
907 
908         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
909         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
910         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
911       }
912     }
913     ierr = MatDestroy(&B);CHKERRQ(ierr);
914     /* coarse basis functions */
915     for (i=0;i<n_constraints;i++) {
916       PetscScalar *y;
917 
918       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
919       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
920       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
921       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
924       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
925       if (pcbddc->switch_static || pcbddc->dbg_flag) {
926         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
927         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
928         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
930         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
931         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
932       }
933       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
934     }
935   }
936   if (n_constraints) {
937     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
938   }
939 
940   /* compute other basis functions for non-symmetric problems */
941   if (!pcbddc->issym) {
942 
943     if (n_constraints) {
944       Mat S_CCT,B_C;
945 
946       /* this is a lazy thing */
947       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
948       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
949       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
950       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
951       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
952       if (n_vertices) {
953         Mat B_V,S_VCT;
954 
955         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
956         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
957         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
958         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
959         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
960       }
961       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
962     } else { /* if there are no constraints, reset work */
963       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
964     }
965     if (n_vertices && n_R) {
966       Mat          A_VRT;
967       PetscScalar  *marray;
968       PetscBLASInt B_N,B_one = 1;
969 
970       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
971       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
972       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
973       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
974       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
975       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
976       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
977     }
978 
979     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
980       for (i=0;i<pcbddc->local_primal_size;i++) {
981         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
982         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
983         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
984         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
985         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
986       }
987     } else {
988       for (i=0;i<pcbddc->local_primal_size;i++) {
989         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
990         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
991         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
992         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
993         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
994       }
995     }
996     /* coarse basis functions */
997     for (i=0;i<pcbddc->local_primal_size;i++) {
998       PetscScalar *y;
999 
1000       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1001       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1002       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1003       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005       if (i<n_vertices) {
1006         y[n_B*i+idx_V_B[i]] = 1.0;
1007       }
1008       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1009       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1010 
1011       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1012         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1013         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1014         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1017         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1018       }
1019       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1020     }
1021   }
1022   /* free memory */
1023   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1024   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1025   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1026   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1027   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1028   ierr = PetscFree(work);CHKERRQ(ierr);
1029   if (n_vertices) {
1030     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1031   }
1032   if (n_constraints) {
1033     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1034   }
1035   /* Checking coarse_sub_mat and coarse basis functios */
1036   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1037   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1038   if (pcbddc->dbg_flag) {
1039     Mat         coarse_sub_mat;
1040     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1041     Mat         coarse_phi_D,coarse_phi_B;
1042     Mat         coarse_psi_D,coarse_psi_B;
1043     Mat         A_II,A_BB,A_IB,A_BI;
1044     Mat         C_B,CPHI;
1045     IS          is_dummy;
1046     Vec         mones;
1047     MatType     checkmattype=MATSEQAIJ;
1048     PetscReal   real_value;
1049 
1050     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1051     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1052     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1053     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1054     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1055     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1056     if (unsymmetric_check) {
1057       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1058       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1059     }
1060     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1061 
1062     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1063     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
1064     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1065     if (unsymmetric_check) {
1066       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1067       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1068       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1069       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1070       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1071       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1072       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1073       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1074       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1075       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1076       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1077       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1078     } else {
1079       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1080       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1081       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1082       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1083       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1084       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1085       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1086       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1087     }
1088     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1089     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1090     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1091     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1092     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1093     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1094     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1095     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1096 
1097     /* check constraints */
1098     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1099     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1100     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1101     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1102     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1103     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1104     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1105     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1106     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1107     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1108     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1109     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1110 
1111     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1112     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1113     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1114     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1115     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1116     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1117     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1118     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1119     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1120     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1121     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1122     if (unsymmetric_check) {
1123       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1124       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1125     }
1126     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1127   }
1128   /* get back data */
1129   *coarse_submat_vals_n = coarse_submat_vals;
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1135 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1136 {
1137   Mat            *work_mat;
1138   IS             isrow_s,iscol_s;
1139   PetscBool      rsorted,csorted;
1140   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1141   PetscErrorCode ierr;
1142 
1143   PetscFunctionBegin;
1144   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1145   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1146   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1147   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1148 
1149   if (!rsorted) {
1150     const PetscInt *idxs;
1151     PetscInt *idxs_sorted,i;
1152 
1153     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1154     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1155     for (i=0;i<rsize;i++) {
1156       idxs_perm_r[i] = i;
1157     }
1158     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1159     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1160     for (i=0;i<rsize;i++) {
1161       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1162     }
1163     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1164     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1165   } else {
1166     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1167     isrow_s = isrow;
1168   }
1169 
1170   if (!csorted) {
1171     if (isrow == iscol) {
1172       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1173       iscol_s = isrow_s;
1174     } else {
1175       const PetscInt *idxs;
1176       PetscInt *idxs_sorted,i;
1177 
1178       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1179       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1180       for (i=0;i<csize;i++) {
1181         idxs_perm_c[i] = i;
1182       }
1183       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1184       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1185       for (i=0;i<csize;i++) {
1186         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1187       }
1188       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1189       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1190     }
1191   } else {
1192     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1193     iscol_s = iscol;
1194   }
1195 
1196   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1197 
1198   if (!rsorted || !csorted) {
1199     Mat      new_mat;
1200     IS       is_perm_r,is_perm_c;
1201 
1202     if (!rsorted) {
1203       PetscInt *idxs_r,i;
1204       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1205       for (i=0;i<rsize;i++) {
1206         idxs_r[idxs_perm_r[i]] = i;
1207       }
1208       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1209       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1210     } else {
1211       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1212     }
1213     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1214 
1215     if (!csorted) {
1216       if (isrow_s == iscol_s) {
1217         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1218         is_perm_c = is_perm_r;
1219       } else {
1220         PetscInt *idxs_c,i;
1221         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1222         for (i=0;i<csize;i++) {
1223           idxs_c[idxs_perm_c[i]] = i;
1224         }
1225         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1226         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1227       }
1228     } else {
1229       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1230     }
1231     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1232 
1233     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1234     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1235     work_mat[0] = new_mat;
1236     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1237     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1238   }
1239 
1240   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1241   *B = work_mat[0];
1242   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1243   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1244   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1250 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1251 {
1252   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1253   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1254   Mat            new_mat;
1255   IS             is_local,is_global;
1256   PetscInt       local_size;
1257   PetscBool      isseqaij;
1258   PetscErrorCode ierr;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1262   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1263   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1264   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1265   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1266   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1267   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1268 
1269   /* check */
1270   if (pcbddc->dbg_flag) {
1271     Vec       x,x_change;
1272     PetscReal error;
1273 
1274     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1275     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1276     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1277     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1279     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1280     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1281     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1282     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1283     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1284     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1286     ierr = VecDestroy(&x);CHKERRQ(ierr);
1287     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1288   }
1289 
1290   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1291   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1292   if (isseqaij) {
1293     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1294   } else {
1295     Mat work_mat;
1296     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1297     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1298     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1299   }
1300   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1301 #if !defined(PETSC_USE_COMPLEX)
1302   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1303 #endif
1304   /*
1305   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1306   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1307   */
1308   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1314 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1315 {
1316   PC_IS*          pcis = (PC_IS*)(pc->data);
1317   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1318   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1319   PetscInt        *idx_R_local=NULL;
1320   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1321   PetscInt        vbs,bs;
1322   PetscBT         bitmask=NULL;
1323   PetscErrorCode  ierr;
1324 
1325   PetscFunctionBegin;
1326   /*
1327     No need to setup local scatters if
1328       - primal space is unchanged
1329         AND
1330       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1331         AND
1332       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1333   */
1334   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1335     PetscFunctionReturn(0);
1336   }
1337   /* destroy old objects */
1338   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1339   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1340   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1341   /* Set Non-overlapping dimensions */
1342   n_B = pcis->n_B;
1343   n_D = pcis->n - n_B;
1344   n_vertices = pcbddc->n_vertices;
1345 
1346   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1347 
1348   /* create auxiliary bitmask and allocate workspace */
1349   if (!sub_schurs->reuse_mumps) {
1350     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1351     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1352     for (i=0;i<n_vertices;i++) {
1353       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1354     }
1355 
1356     for (i=0, n_R=0; i<pcis->n; i++) {
1357       if (!PetscBTLookup(bitmask,i)) {
1358         idx_R_local[n_R++] = i;
1359       }
1360     }
1361   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1362     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1363 
1364     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1365     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1366   }
1367 
1368   /* Block code */
1369   vbs = 1;
1370   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1371   if (bs>1 && !(n_vertices%bs)) {
1372     PetscBool is_blocked = PETSC_TRUE;
1373     PetscInt  *vary;
1374     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1375     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1376     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1377     for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1378     for (i=0; i<n_vertices/bs; i++) {
1379       if (vary[i]!=0 && vary[i]!=bs) {
1380         is_blocked = PETSC_FALSE;
1381         break;
1382       }
1383     }
1384     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1385       vbs = bs;
1386       for (i=0;i<n_R/vbs;i++) {
1387         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1388       }
1389     }
1390     ierr = PetscFree(vary);CHKERRQ(ierr);
1391   }
1392   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1393   if (sub_schurs->reuse_mumps) {
1394     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1395 
1396     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1397     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1398     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1399     reuse_mumps->is_R = pcbddc->is_R_local;
1400   } else {
1401     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1402   }
1403 
1404   /* print some info if requested */
1405   if (pcbddc->dbg_flag) {
1406     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1407     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1408     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1409     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1410     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1411     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1412     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1413   }
1414 
1415   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1416   if (!sub_schurs->reuse_mumps) {
1417     IS       is_aux1,is_aux2;
1418     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1419 
1420     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1421     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1422     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1423     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1424     for (i=0; i<n_D; i++) {
1425       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1426     }
1427     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1428     for (i=0, j=0; i<n_R; i++) {
1429       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1430         aux_array1[j++] = i;
1431       }
1432     }
1433     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1434     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1435     for (i=0, j=0; i<n_B; i++) {
1436       if (!PetscBTLookup(bitmask,is_indices[i])) {
1437         aux_array2[j++] = i;
1438       }
1439     }
1440     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1441     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1442     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1443     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1444     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1445 
1446     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1447       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1448       for (i=0, j=0; i<n_R; i++) {
1449         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1450           aux_array1[j++] = i;
1451         }
1452       }
1453       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1454       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1455       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1456     }
1457     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1458     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1459   } else {
1460     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1461     IS               tis;
1462     PetscInt         schur_size;
1463 
1464     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1465     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1466     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1467     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1468     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1469       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1470       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1471       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1472     }
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1480 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1481 {
1482   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1483   PC_IS          *pcis = (PC_IS*)pc->data;
1484   PC             pc_temp;
1485   Mat            A_RR;
1486   MatReuse       reuse;
1487   PetscScalar    m_one = -1.0;
1488   PetscReal      value;
1489   PetscInt       n_D,n_R,ibs,mbs;
1490   PetscBool      use_exact,use_exact_reduced,issbaij;
1491   PetscErrorCode ierr;
1492   /* prefixes stuff */
1493   char           dir_prefix[256],neu_prefix[256],str_level[16];
1494   size_t         len;
1495 
1496   PetscFunctionBegin;
1497 
1498   /* compute prefixes */
1499   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1500   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1501   if (!pcbddc->current_level) {
1502     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1503     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1504     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1505     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1506   } else {
1507     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1508     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1509     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1510     len -= 15; /* remove "pc_bddc_coarse_" */
1511     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1512     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1513     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1514     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1515     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1516     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1517     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1518     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1519   }
1520 
1521   /* DIRICHLET PROBLEM */
1522   if (dirichlet) {
1523     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1524     if (pcbddc->issym) {
1525       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1526     }
1527     /* Matrix for Dirichlet problem is pcis->A_II */
1528     n_D = pcis->n - pcis->n_B;
1529     if (!pcbddc->ksp_D) { /* create object if not yet build */
1530       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1531       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1532       /* default */
1533       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1534       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1535       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1536       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1537       if (issbaij) {
1538         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1539       } else {
1540         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1541       }
1542       /* Allow user's customization */
1543       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1544       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1545     }
1546     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1547     if (sub_schurs->reuse_mumps) {
1548       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1549 
1550       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1551     }
1552     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1553     if (!n_D) {
1554       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1555       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1556     }
1557     /* Set Up KSP for Dirichlet problem of BDDC */
1558     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1559     /* set ksp_D into pcis data */
1560     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1561     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1562     pcis->ksp_D = pcbddc->ksp_D;
1563   }
1564 
1565   /* NEUMANN PROBLEM */
1566   A_RR = 0;
1567   if (neumann) {
1568     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1569     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1570     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1571     if (pcbddc->ksp_R) { /* already created ksp */
1572       PetscInt nn_R;
1573       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1574       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1575       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1576       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1577         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1578         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1579         reuse = MAT_INITIAL_MATRIX;
1580       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1581         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1582           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583           reuse = MAT_INITIAL_MATRIX;
1584         } else { /* safe to reuse the matrix */
1585           reuse = MAT_REUSE_MATRIX;
1586         }
1587       }
1588       /* last check */
1589       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1590         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1591         reuse = MAT_INITIAL_MATRIX;
1592       }
1593     } else { /* first time, so we need to create the matrix */
1594       reuse = MAT_INITIAL_MATRIX;
1595     }
1596     /* extract A_RR */
1597     if (!sub_schurs->reuse_mumps) {
1598       ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1599       ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1600       if (ibs != mbs) {
1601         Mat newmat;
1602         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1603         ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1604         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1605       } else {
1606         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1607       }
1608       if (pcbddc->issym) {
1609         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1610       }
1611     } else {
1612       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1613 
1614       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1615       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1616       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1617     }
1618     if (!pcbddc->ksp_R) { /* create object if not present */
1619       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1620       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1621       /* default */
1622       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1623       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1624       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1625       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1626       if (issbaij) {
1627         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1628       } else {
1629         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1630       }
1631       /* Allow user's customization */
1632       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1633       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1634     }
1635     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1636     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1637     if (!n_R) {
1638       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1639       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1640     }
1641     /* Reuse MUMPS solver if it is present */
1642     if (sub_schurs->reuse_mumps) {
1643       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1644 
1645       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1646     }
1647     /* Set Up KSP for Neumann problem of BDDC */
1648     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1649   }
1650   /* free Neumann problem's matrix */
1651   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1652 
1653   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1654   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1655     if (pcbddc->dbg_flag) {
1656       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1657       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1658       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1659     }
1660     if (dirichlet) { /* Dirichlet */
1661       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1662       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1663       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1664       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1665       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1666       /* need to be adapted? */
1667       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1668       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1669       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1670       /* print info */
1671       if (pcbddc->dbg_flag) {
1672         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1673         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1674       }
1675       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1676         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1677       }
1678     }
1679     if (neumann) { /* Neumann */
1680       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1681       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1682       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1683       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1684       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1685       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1686       /* need to be adapted? */
1687       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1688       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1689       /* print info */
1690       if (pcbddc->dbg_flag) {
1691         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1692         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1693       }
1694       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1695         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1696       }
1697     }
1698   }
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1704 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1705 {
1706   PetscErrorCode  ierr;
1707   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1708   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1709 
1710   PetscFunctionBegin;
1711   if (!sub_schurs->reuse_mumps) {
1712     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1713   }
1714   if (!pcbddc->switch_static) {
1715     if (applytranspose && pcbddc->local_auxmat1) {
1716       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1717       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1718     }
1719     if (!sub_schurs->reuse_mumps) {
1720       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1721       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1722     } else {
1723       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1724 
1725       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1727     }
1728   } else {
1729     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1730     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1731     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1732     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1733     if (applytranspose && pcbddc->local_auxmat1) {
1734       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1735       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1736       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1737       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1738     }
1739   }
1740   if (!sub_schurs->reuse_mumps) {
1741     if (applytranspose) {
1742       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1743     } else {
1744       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1745     }
1746 #if defined(PETSC_HAVE_MUMPS)
1747   } else {
1748     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1749 
1750     if (applytranspose) {
1751       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1752     } else {
1753       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1754     }
1755 #endif
1756   }
1757   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1758   if (!pcbddc->switch_static) {
1759     if (!sub_schurs->reuse_mumps) {
1760       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1761       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1762     } else {
1763       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1764 
1765       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1766       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1767     }
1768     if (!applytranspose && pcbddc->local_auxmat1) {
1769       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1770       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1771     }
1772   } else {
1773     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1774     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1775     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1776     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1777     if (!applytranspose && pcbddc->local_auxmat1) {
1778       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1779       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1780     }
1781     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1782     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1783     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1784     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1785   }
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1790 #undef __FUNCT__
1791 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1792 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1793 {
1794   PetscErrorCode ierr;
1795   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1796   PC_IS*            pcis = (PC_IS*)  (pc->data);
1797   const PetscScalar zero = 0.0;
1798 
1799   PetscFunctionBegin;
1800   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1801   if (applytranspose) {
1802     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1803     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1804   } else {
1805     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1806     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1807   }
1808   /* start communications from local primal nodes to rhs of coarse solver */
1809   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1810   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1811   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1812 
1813   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1814   /* TODO remove null space when doing multilevel */
1815   if (pcbddc->coarse_ksp) {
1816     Vec rhs,sol;
1817 
1818     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1819     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1820     if (applytranspose) {
1821       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1822     } else {
1823       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1824     }
1825   }
1826 
1827   /* Local solution on R nodes */
1828   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
1829     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
1830   }
1831 
1832   /* communications from coarse sol to local primal nodes */
1833   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1834   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1835 
1836   /* Sum contributions from two levels */
1837   if (applytranspose) {
1838     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1839     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1840   } else {
1841     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1842     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1849 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1850 {
1851   PetscErrorCode ierr;
1852   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1853   PetscScalar    *array;
1854   Vec            from,to;
1855 
1856   PetscFunctionBegin;
1857   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1858     from = pcbddc->coarse_vec;
1859     to = pcbddc->vec1_P;
1860     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1861       Vec tvec;
1862 
1863       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1864       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1865       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1866       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1867       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1868       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1869     }
1870   } else { /* from local to global -> put data in coarse right hand side */
1871     from = pcbddc->vec1_P;
1872     to = pcbddc->coarse_vec;
1873   }
1874   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1880 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1881 {
1882   PetscErrorCode ierr;
1883   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1884   PetscScalar    *array;
1885   Vec            from,to;
1886 
1887   PetscFunctionBegin;
1888   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1889     from = pcbddc->coarse_vec;
1890     to = pcbddc->vec1_P;
1891   } else { /* from local to global -> put data in coarse right hand side */
1892     from = pcbddc->vec1_P;
1893     to = pcbddc->coarse_vec;
1894   }
1895   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1896   if (smode == SCATTER_FORWARD) {
1897     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1898       Vec tvec;
1899 
1900       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1901       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1902       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1903       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1904     }
1905   } else {
1906     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1907      ierr = VecResetArray(from);CHKERRQ(ierr);
1908     }
1909   }
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 /* uncomment for testing purposes */
1914 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1915 #undef __FUNCT__
1916 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1917 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1918 {
1919   PetscErrorCode    ierr;
1920   PC_IS*            pcis = (PC_IS*)(pc->data);
1921   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1922   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1923   /* one and zero */
1924   PetscScalar       one=1.0,zero=0.0;
1925   /* space to store constraints and their local indices */
1926   PetscScalar       *constraints_data;
1927   PetscInt          *constraints_idxs,*constraints_idxs_B;
1928   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
1929   PetscInt          *constraints_n;
1930   /* iterators */
1931   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
1932   /* BLAS integers */
1933   PetscBLASInt      lwork,lierr;
1934   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1935   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1936   /* reuse */
1937   PetscInt          olocal_primal_size,olocal_primal_size_cc;
1938   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
1939   /* change of basis */
1940   PetscBool         qr_needed;
1941   PetscBT           change_basis,qr_needed_idx;
1942   /* auxiliary stuff */
1943   PetscInt          *nnz,*is_indices;
1944   PetscInt          ncc;
1945   /* some quantities */
1946   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1947   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1948 
1949   PetscFunctionBegin;
1950   /* Destroy Mat objects computed previously */
1951   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1952   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1953   /* save info on constraints from previous setup (if any) */
1954   olocal_primal_size = pcbddc->local_primal_size;
1955   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
1956   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
1957   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1958   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
1959   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
1960   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
1961 
1962   /* print some info */
1963   if (pcbddc->dbg_flag) {
1964     IS       vertices;
1965     PetscInt nv,nedges,nfaces;
1966     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1967     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1968     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1969     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1970     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1971     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1972     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1973     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1974     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1975   }
1976 
1977   if (!pcbddc->adaptive_selection) {
1978     IS           ISForVertices,*ISForFaces,*ISForEdges;
1979     MatNullSpace nearnullsp;
1980     const Vec    *nearnullvecs;
1981     Vec          *localnearnullsp;
1982     PetscScalar  *array;
1983     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1984     PetscBool    nnsp_has_cnst;
1985     /* LAPACK working arrays for SVD or POD */
1986     PetscBool    skip_lapack,boolforchange;
1987     PetscScalar  *work;
1988     PetscReal    *singular_vals;
1989 #if defined(PETSC_USE_COMPLEX)
1990     PetscReal    *rwork;
1991 #endif
1992 #if defined(PETSC_MISSING_LAPACK_GESVD)
1993     PetscScalar  *temp_basis,*correlation_mat;
1994 #else
1995     PetscBLASInt dummy_int=1;
1996     PetscScalar  dummy_scalar=1.;
1997 #endif
1998 
1999     /* Get index sets for faces, edges and vertices from graph */
2000     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2001     /* free unneeded index sets */
2002     if (!pcbddc->use_vertices) {
2003       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2004     }
2005     if (!pcbddc->use_edges) {
2006       for (i=0;i<n_ISForEdges;i++) {
2007         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2008       }
2009       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2010       n_ISForEdges = 0;
2011     }
2012     if (!pcbddc->use_faces) {
2013       for (i=0;i<n_ISForFaces;i++) {
2014         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2015       }
2016       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2017       n_ISForFaces = 0;
2018     }
2019 
2020 #if defined(PETSC_USE_DEBUG)
2021     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2022        Also use_change_of_basis should be consistent among processors */
2023     if (pcbddc->NullSpace) {
2024       PetscBool tbool[2],gbool[2];
2025 
2026       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2027         pcbddc->use_change_of_basis = PETSC_TRUE;
2028         if (!ISForEdges) {
2029           pcbddc->use_change_on_faces = PETSC_TRUE;
2030         }
2031       }
2032       tbool[0] = pcbddc->use_change_of_basis;
2033       tbool[1] = pcbddc->use_change_on_faces;
2034       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2035       pcbddc->use_change_of_basis = gbool[0];
2036       pcbddc->use_change_on_faces = gbool[1];
2037     }
2038 #endif
2039 
2040     /* check if near null space is attached to global mat */
2041     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2042     if (nearnullsp) {
2043       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2044       /* remove any stored info */
2045       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2046       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2047       /* store information for BDDC solver reuse */
2048       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2049       pcbddc->onearnullspace = nearnullsp;
2050       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2051       for (i=0;i<nnsp_size;i++) {
2052         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2053       }
2054     } else { /* if near null space is not provided BDDC uses constants by default */
2055       nnsp_size = 0;
2056       nnsp_has_cnst = PETSC_TRUE;
2057     }
2058     /* get max number of constraints on a single cc */
2059     max_constraints = nnsp_size;
2060     if (nnsp_has_cnst) max_constraints++;
2061 
2062     /*
2063          Evaluate maximum storage size needed by the procedure
2064          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2065          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2066          There can be multiple constraints per connected component
2067                                                                                                                                                            */
2068     n_vertices = 0;
2069     if (ISForVertices) {
2070       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2071     }
2072     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2073     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2074 
2075     total_counts = n_ISForFaces+n_ISForEdges;
2076     total_counts *= max_constraints;
2077     total_counts += n_vertices;
2078     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2079 
2080     total_counts = 0;
2081     max_size_of_constraint = 0;
2082     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2083       IS used_is;
2084       if (i<n_ISForEdges) {
2085         used_is = ISForEdges[i];
2086       } else {
2087         used_is = ISForFaces[i-n_ISForEdges];
2088       }
2089       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2090       total_counts += j;
2091       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2092     }
2093     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2094 
2095     /* get local part of global near null space vectors */
2096     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2097     for (k=0;k<nnsp_size;k++) {
2098       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2099       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2100       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2101     }
2102 
2103     /* whether or not to skip lapack calls */
2104     skip_lapack = PETSC_TRUE;
2105     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2106 
2107     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2108     if (!skip_lapack) {
2109       PetscScalar temp_work;
2110 
2111 #if defined(PETSC_MISSING_LAPACK_GESVD)
2112       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2113       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2114       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2115       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2116 #if defined(PETSC_USE_COMPLEX)
2117       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2118 #endif
2119       /* now we evaluate the optimal workspace using query with lwork=-1 */
2120       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2121       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2122       lwork = -1;
2123       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2124 #if !defined(PETSC_USE_COMPLEX)
2125       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2126 #else
2127       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2128 #endif
2129       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2130       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2131 #else /* on missing GESVD */
2132       /* SVD */
2133       PetscInt max_n,min_n;
2134       max_n = max_size_of_constraint;
2135       min_n = max_constraints;
2136       if (max_size_of_constraint < max_constraints) {
2137         min_n = max_size_of_constraint;
2138         max_n = max_constraints;
2139       }
2140       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2141 #if defined(PETSC_USE_COMPLEX)
2142       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2143 #endif
2144       /* now we evaluate the optimal workspace using query with lwork=-1 */
2145       lwork = -1;
2146       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2147       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2148       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2149       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2150 #if !defined(PETSC_USE_COMPLEX)
2151       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2152 #else
2153       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2154 #endif
2155       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2156       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2157 #endif /* on missing GESVD */
2158       /* Allocate optimal workspace */
2159       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2160       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2161     }
2162     /* Now we can loop on constraining sets */
2163     total_counts = 0;
2164     constraints_idxs_ptr[0] = 0;
2165     constraints_data_ptr[0] = 0;
2166     /* vertices */
2167     if (n_vertices) {
2168       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2169       if (nnsp_has_cnst) { /* it considers all possible vertices */
2170         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2171         for (i=0;i<n_vertices;i++) {
2172           constraints_n[total_counts] = 1;
2173           constraints_data[total_counts] = 1.0;
2174           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2175           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2176           total_counts++;
2177         }
2178       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2179         PetscBool used_vertex;
2180         for (i=0;i<n_vertices;i++) {
2181           used_vertex = PETSC_FALSE;
2182           k = 0;
2183           while (!used_vertex && k<nnsp_size) {
2184             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2185             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2186               constraints_n[total_counts] = 1;
2187               constraints_idxs[total_counts] = is_indices[i];
2188               constraints_data[total_counts] = 1.0;
2189               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2190               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2191               total_counts++;
2192               used_vertex = PETSC_TRUE;
2193             }
2194             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2195             k++;
2196           }
2197         }
2198       }
2199       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2200       n_vertices = total_counts;
2201     }
2202 
2203     /* edges and faces */
2204     total_counts_cc = total_counts;
2205     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2206       IS        used_is;
2207       PetscBool idxs_copied = PETSC_FALSE;
2208 
2209       if (ncc<n_ISForEdges) {
2210         used_is = ISForEdges[ncc];
2211         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2212       } else {
2213         used_is = ISForFaces[ncc-n_ISForEdges];
2214         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2215       }
2216       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2217 
2218       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2219       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2220       /* change of basis should not be performed on local periodic nodes */
2221       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2222       if (nnsp_has_cnst) {
2223         PetscScalar quad_value;
2224 
2225         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2226         idxs_copied = PETSC_TRUE;
2227 
2228         if (!pcbddc->use_nnsp_true) {
2229           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2230         } else {
2231           quad_value = 1.0;
2232         }
2233         for (j=0;j<size_of_constraint;j++) {
2234           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2235         }
2236         temp_constraints++;
2237         total_counts++;
2238       }
2239       for (k=0;k<nnsp_size;k++) {
2240         PetscReal real_value;
2241         PetscScalar *ptr_to_data;
2242 
2243         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2244         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2245         for (j=0;j<size_of_constraint;j++) {
2246           ptr_to_data[j] = array[is_indices[j]];
2247         }
2248         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2249         /* check if array is null on the connected component */
2250         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2251         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2252         if (real_value > 0.0) { /* keep indices and values */
2253           temp_constraints++;
2254           total_counts++;
2255           if (!idxs_copied) {
2256             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2257             idxs_copied = PETSC_TRUE;
2258           }
2259         }
2260       }
2261       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2262       valid_constraints = temp_constraints;
2263       if (!pcbddc->use_nnsp_true && temp_constraints) {
2264         if (temp_constraints == 1) { /* just normalize the constraint */
2265           PetscScalar norm,*ptr_to_data;
2266 
2267           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2268           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2269           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2270           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2271           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2272         } else { /* perform SVD */
2273           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2274           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2275 
2276 #if defined(PETSC_MISSING_LAPACK_GESVD)
2277           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2278              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2279              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2280                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2281                 from that computed using LAPACKgesvd
2282              -> This is due to a different computation of eigenvectors in LAPACKheev
2283              -> The quality of the POD-computed basis will be the same */
2284           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2285           /* Store upper triangular part of correlation matrix */
2286           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2287           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2288           for (j=0;j<temp_constraints;j++) {
2289             for (k=0;k<j+1;k++) {
2290               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2291             }
2292           }
2293           /* compute eigenvalues and eigenvectors of correlation matrix */
2294           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2295           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2296 #if !defined(PETSC_USE_COMPLEX)
2297           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2298 #else
2299           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2300 #endif
2301           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2302           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2303           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2304           j = 0;
2305           while (j < temp_constraints && singular_vals[j] < tol) j++;
2306           total_counts = total_counts-j;
2307           valid_constraints = temp_constraints-j;
2308           /* scale and copy POD basis into used quadrature memory */
2309           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2310           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2311           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2312           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2313           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2314           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2315           if (j<temp_constraints) {
2316             PetscInt ii;
2317             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2318             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2319             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2320             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2321             for (k=0;k<temp_constraints-j;k++) {
2322               for (ii=0;ii<size_of_constraint;ii++) {
2323                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2324               }
2325             }
2326           }
2327 #else  /* on missing GESVD */
2328           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2329           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2330           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2331           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2332 #if !defined(PETSC_USE_COMPLEX)
2333           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2334 #else
2335           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2336 #endif
2337           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2338           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2339           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2340           k = temp_constraints;
2341           if (k > size_of_constraint) k = size_of_constraint;
2342           j = 0;
2343           while (j < k && singular_vals[k-j-1] < tol) j++;
2344           valid_constraints = k-j;
2345           total_counts = total_counts-temp_constraints+valid_constraints;
2346 #endif /* on missing GESVD */
2347         }
2348       }
2349       /* update pointers information */
2350       if (valid_constraints) {
2351         constraints_n[total_counts_cc] = valid_constraints;
2352         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2353         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2354         /* set change_of_basis flag */
2355         if (boolforchange) {
2356           PetscBTSet(change_basis,total_counts_cc);
2357         }
2358         total_counts_cc++;
2359       }
2360     }
2361     /* free workspace */
2362     if (!skip_lapack) {
2363       ierr = PetscFree(work);CHKERRQ(ierr);
2364 #if defined(PETSC_USE_COMPLEX)
2365       ierr = PetscFree(rwork);CHKERRQ(ierr);
2366 #endif
2367       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2368 #if defined(PETSC_MISSING_LAPACK_GESVD)
2369       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2370       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2371 #endif
2372     }
2373     for (k=0;k<nnsp_size;k++) {
2374       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2375     }
2376     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2377     /* free index sets of faces, edges and vertices */
2378     for (i=0;i<n_ISForFaces;i++) {
2379       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2380     }
2381     if (n_ISForFaces) {
2382       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2383     }
2384     for (i=0;i<n_ISForEdges;i++) {
2385       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2386     }
2387     if (n_ISForEdges) {
2388       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2389     }
2390     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2391   } else {
2392     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2393 
2394     total_counts = 0;
2395     n_vertices = 0;
2396     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2397       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2398     }
2399     max_constraints = 0;
2400     total_counts_cc = 0;
2401     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2402       total_counts += pcbddc->adaptive_constraints_n[i];
2403       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2404       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2405     }
2406     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2407     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2408     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2409     constraints_data = pcbddc->adaptive_constraints_data;
2410     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2411     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2412     total_counts_cc = 0;
2413     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2414       if (pcbddc->adaptive_constraints_n[i]) {
2415         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2416       }
2417     }
2418 #if 0
2419     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2420     for (i=0;i<total_counts_cc;i++) {
2421       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2422       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2423       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2424         printf(" %d",constraints_idxs[j]);
2425       }
2426       printf("\n");
2427       printf("number of cc: %d\n",constraints_n[i]);
2428     }
2429     for (i=0;i<n_vertices;i++) {
2430       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2431     }
2432     for (i=0;i<sub_schurs->n_subs;i++) {
2433       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2434     }
2435 #endif
2436 
2437     max_size_of_constraint = 0;
2438     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2439     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2440     /* Change of basis */
2441     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2442     if (pcbddc->use_change_of_basis) {
2443       for (i=0;i<sub_schurs->n_subs;i++) {
2444         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2445           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2446         }
2447       }
2448     }
2449   }
2450   pcbddc->local_primal_size = total_counts;
2451   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2452 
2453   /* map constraints_idxs in boundary numbering */
2454   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2455   if (i != constraints_idxs_ptr[total_counts_cc]) {
2456     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2457   }
2458 
2459   /* Create constraint matrix */
2460   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2461   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2462   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2463 
2464   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2465   /* determine if a QR strategy is needed for change of basis */
2466   qr_needed = PETSC_FALSE;
2467   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2468   total_primal_vertices=0;
2469   pcbddc->local_primal_size_cc = 0;
2470   for (i=0;i<total_counts_cc;i++) {
2471     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2472     if (size_of_constraint == 1) {
2473       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2474       pcbddc->local_primal_size_cc += 1;
2475     } else if (PetscBTLookup(change_basis,i)) {
2476       for (k=0;k<constraints_n[i];k++) {
2477         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2478       }
2479       pcbddc->local_primal_size_cc += constraints_n[i];
2480       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2481         PetscBTSet(qr_needed_idx,i);
2482         qr_needed = PETSC_TRUE;
2483       }
2484     } else {
2485       pcbddc->local_primal_size_cc += 1;
2486     }
2487   }
2488   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2489   pcbddc->n_vertices = total_primal_vertices;
2490   /* permute indices in order to have a sorted set of vertices */
2491   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2492 
2493   ierr = PetscMalloc2(pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2494   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2495   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2496 
2497   /* nonzero structure of constraint matrix */
2498   /* and get reference dof for local constraints */
2499   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2500   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2501 
2502   j = total_primal_vertices;
2503   total_counts = total_primal_vertices;
2504   cum = total_primal_vertices;
2505   for (i=n_vertices;i<total_counts_cc;i++) {
2506     if (!PetscBTLookup(change_basis,i)) {
2507       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2508       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2509       cum++;
2510       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2511       for (k=0;k<constraints_n[i];k++) {
2512         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2513         nnz[j+k] = size_of_constraint;
2514       }
2515       j += constraints_n[i];
2516     }
2517   }
2518   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2519   ierr = PetscFree(nnz);CHKERRQ(ierr);
2520 
2521   /* set values in constraint matrix */
2522   for (i=0;i<total_primal_vertices;i++) {
2523     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2524   }
2525   total_counts = total_primal_vertices;
2526   for (i=n_vertices;i<total_counts_cc;i++) {
2527     if (!PetscBTLookup(change_basis,i)) {
2528       PetscInt *cols;
2529 
2530       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2531       cols = constraints_idxs+constraints_idxs_ptr[i];
2532       for (k=0;k<constraints_n[i];k++) {
2533         PetscInt    row = total_counts+k;
2534         PetscScalar *vals;
2535 
2536         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2537         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2538       }
2539       total_counts += constraints_n[i];
2540     }
2541   }
2542   /* assembling */
2543   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2544   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2545 
2546   /*
2547   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2548   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2549   */
2550   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2551   if (pcbddc->use_change_of_basis) {
2552     /* dual and primal dofs on a single cc */
2553     PetscInt     dual_dofs,primal_dofs;
2554     /* working stuff for GEQRF */
2555     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2556     PetscBLASInt lqr_work;
2557     /* working stuff for UNGQR */
2558     PetscScalar  *gqr_work,lgqr_work_t;
2559     PetscBLASInt lgqr_work;
2560     /* working stuff for TRTRS */
2561     PetscScalar  *trs_rhs;
2562     PetscBLASInt Blas_NRHS;
2563     /* pointers for values insertion into change of basis matrix */
2564     PetscInt     *start_rows,*start_cols;
2565     PetscScalar  *start_vals;
2566     /* working stuff for values insertion */
2567     PetscBT      is_primal;
2568     PetscInt     *aux_primal_numbering_B;
2569     /* matrix sizes */
2570     PetscInt     global_size,local_size;
2571     /* temporary change of basis */
2572     Mat          localChangeOfBasisMatrix;
2573     /* extra space for debugging */
2574     PetscScalar  *dbg_work;
2575 
2576     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2577     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2578     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2579     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2580     /* nonzeros for local mat */
2581     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2582     for (i=0;i<pcis->n;i++) nnz[i]=1;
2583     for (i=n_vertices;i<total_counts_cc;i++) {
2584       if (PetscBTLookup(change_basis,i)) {
2585         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2586         if (PetscBTLookup(qr_needed_idx,i)) {
2587           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2588         } else {
2589           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2590           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2591         }
2592       }
2593     }
2594     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2595     ierr = PetscFree(nnz);CHKERRQ(ierr);
2596     /* Set initial identity in the matrix */
2597     for (i=0;i<pcis->n;i++) {
2598       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2599     }
2600 
2601     if (pcbddc->dbg_flag) {
2602       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2603       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2604     }
2605 
2606 
2607     /* Now we loop on the constraints which need a change of basis */
2608     /*
2609        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2610        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2611 
2612        Basic blocks of change of basis matrix T computed by
2613 
2614           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2615 
2616             | 1        0   ...        0         s_1/S |
2617             | 0        1   ...        0         s_2/S |
2618             |              ...                        |
2619             | 0        ...            1     s_{n-1}/S |
2620             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2621 
2622             with S = \sum_{i=1}^n s_i^2
2623             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2624                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2625 
2626           - QR decomposition of constraints otherwise
2627     */
2628     if (qr_needed) {
2629       /* space to store Q */
2630       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2631       /* first we issue queries for optimal work */
2632       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2633       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2634       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2635       lqr_work = -1;
2636       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2637       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2638       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2639       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2640       lgqr_work = -1;
2641       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2642       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2643       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2644       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2645       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2646       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2647       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2648       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2649       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2650       /* array to store scaling factors for reflectors */
2651       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2652       /* array to store rhs and solution of triangular solver */
2653       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2654       /* allocating workspace for check */
2655       if (pcbddc->dbg_flag) {
2656         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2657       }
2658     }
2659     /* array to store whether a node is primal or not */
2660     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2661     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2662     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2663     if (i != total_primal_vertices) {
2664       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2665     }
2666     for (i=0;i<total_primal_vertices;i++) {
2667       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2668     }
2669     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2670 
2671     /* loop on constraints and see whether or not they need a change of basis and compute it */
2672     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2673       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2674       if (PetscBTLookup(change_basis,total_counts)) {
2675         /* get constraint info */
2676         primal_dofs = constraints_n[total_counts];
2677         dual_dofs = size_of_constraint-primal_dofs;
2678 
2679         if (pcbddc->dbg_flag) {
2680           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2681         }
2682 
2683         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2684 
2685           /* copy quadrature constraints for change of basis check */
2686           if (pcbddc->dbg_flag) {
2687             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2688           }
2689           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2690           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2691 
2692           /* compute QR decomposition of constraints */
2693           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2694           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2695           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2696           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2697           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2698           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2699           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2700 
2701           /* explictly compute R^-T */
2702           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2703           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2704           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2705           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2706           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2707           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2708           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2709           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2710           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2711           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2712 
2713           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2714           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2715           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2716           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2717           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2718           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2719           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2720           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2721           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2722 
2723           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2724              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2725              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2726           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2727           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2728           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2729           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2730           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2731           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2732           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2733           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2734           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2735           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2736 
2737           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2738           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2739           /* insert cols for primal dofs */
2740           for (j=0;j<primal_dofs;j++) {
2741             start_vals = &qr_basis[j*size_of_constraint];
2742             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2743             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2744           }
2745           /* insert cols for dual dofs */
2746           for (j=0,k=0;j<dual_dofs;k++) {
2747             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2748               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2749               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2750               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2751               j++;
2752             }
2753           }
2754 
2755           /* check change of basis */
2756           if (pcbddc->dbg_flag) {
2757             PetscInt   ii,jj;
2758             PetscBool valid_qr=PETSC_TRUE;
2759             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2760             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2761             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2762             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2763             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2764             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2765             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2766             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2767             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2768             for (jj=0;jj<size_of_constraint;jj++) {
2769               for (ii=0;ii<primal_dofs;ii++) {
2770                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2771                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2772               }
2773             }
2774             if (!valid_qr) {
2775               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2776               for (jj=0;jj<size_of_constraint;jj++) {
2777                 for (ii=0;ii<primal_dofs;ii++) {
2778                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2779                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2780                   }
2781                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2782                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2783                   }
2784                 }
2785               }
2786             } else {
2787               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2788             }
2789           }
2790         } else { /* simple transformation block */
2791           PetscInt    row,col;
2792           PetscScalar val,norm;
2793 
2794           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2795           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
2796           for (j=0;j<size_of_constraint;j++) {
2797             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
2798             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2799             if (!PetscBTLookup(is_primal,row_B)) {
2800               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
2801               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2802               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2803             } else {
2804               for (k=0;k<size_of_constraint;k++) {
2805                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2806                 if (row != col) {
2807                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
2808                 } else {
2809                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
2810                 }
2811                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2812               }
2813             }
2814           }
2815           if (pcbddc->dbg_flag) {
2816             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2817           }
2818         }
2819       } else {
2820         if (pcbddc->dbg_flag) {
2821           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
2822         }
2823       }
2824     }
2825 
2826     /* free workspace */
2827     if (qr_needed) {
2828       if (pcbddc->dbg_flag) {
2829         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2830       }
2831       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2832       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2833       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2834       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2835       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2836     }
2837     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2838     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840 
2841     /* assembling of global change of variable */
2842     {
2843       Mat      tmat;
2844       PetscInt bs;
2845 
2846       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2847       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2848       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2849       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2850       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2851       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2852       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2853       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2854       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2855       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2856       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2857       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2858       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2859       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2860       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2861       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2862       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2863       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2864     }
2865     /* check */
2866     if (pcbddc->dbg_flag) {
2867       PetscReal error;
2868       Vec       x,x_change;
2869 
2870       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2871       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2872       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2873       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2874       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2875       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2876       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2877       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2878       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2879       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2880       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2881       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2882       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2883       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2884       ierr = VecDestroy(&x);CHKERRQ(ierr);
2885       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2886     }
2887 
2888     /* adapt sub_schurs computed (if any) */
2889     if (pcbddc->use_deluxe_scaling) {
2890       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2891       if (sub_schurs->S_Ej_all) {
2892         Mat S_new,tmat;
2893         IS is_all_N;
2894 
2895         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2896         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2897         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2898         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2899         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2900         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2901         sub_schurs->S_Ej_all = S_new;
2902         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2903         if (sub_schurs->sum_S_Ej_all) {
2904           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2905           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2906           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2907           sub_schurs->sum_S_Ej_all = S_new;
2908           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2909         }
2910         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2911       }
2912     }
2913     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2914   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2915     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2916     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2917   }
2918 
2919   /* set up change of basis context */
2920   if (pcbddc->ChangeOfBasisMatrix) {
2921     PCBDDCChange_ctx change_ctx;
2922 
2923     if (!pcbddc->new_global_mat) {
2924       PetscInt global_size,local_size;
2925 
2926       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2927       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2928       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2929       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2930       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2931       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2932       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2933       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2934       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2935     } else {
2936       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2937       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2938       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2939     }
2940     if (!pcbddc->user_ChangeOfBasisMatrix) {
2941       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2942       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2943     } else {
2944       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2945       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2946     }
2947     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2948     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2949     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2950     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2951   }
2952 
2953   /* check if a new primal space has been introduced */
2954   pcbddc->new_primal_space_local = PETSC_TRUE;
2955   if (olocal_primal_size == pcbddc->local_primal_size) {
2956     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2957     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2958     if (!pcbddc->new_primal_space_local) {
2959       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2960       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2961     }
2962   }
2963   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
2964   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2965   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2966 
2967   /* flush dbg viewer */
2968   if (pcbddc->dbg_flag) {
2969     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2970   }
2971 
2972   /* free workspace */
2973   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2974   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2975   if (!pcbddc->adaptive_selection) {
2976     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
2977     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
2978   } else {
2979     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
2980                       pcbddc->adaptive_constraints_idxs_ptr,
2981                       pcbddc->adaptive_constraints_data_ptr,
2982                       pcbddc->adaptive_constraints_idxs,
2983                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2984     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
2985     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
2986   }
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2992 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2993 {
2994   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2995   PC_IS       *pcis = (PC_IS*)pc->data;
2996   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2997   PetscInt    ierr,i,vertex_size,N;
2998   PetscViewer viewer=pcbddc->dbg_viewer;
2999 
3000   PetscFunctionBegin;
3001   /* Reset previously computed graph */
3002   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3003   /* Init local Graph struct */
3004   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3005   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
3006 
3007   /* Check validity of the csr graph passed in by the user */
3008   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3009     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3010   }
3011 
3012   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3013   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3014     PetscInt  *xadj,*adjncy;
3015     PetscInt  nvtxs;
3016     PetscBool flg_row=PETSC_FALSE;
3017 
3018     if (pcbddc->use_local_adj) {
3019 
3020       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3021       if (flg_row) {
3022         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3023         pcbddc->computed_rowadj = PETSC_TRUE;
3024       }
3025       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3026     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3027       IS                     is_dummy;
3028       ISLocalToGlobalMapping l2gmap_dummy;
3029       PetscInt               j,sum;
3030       PetscInt               *cxadj,*cadjncy;
3031       const PetscInt         *idxs;
3032       PCBDDCGraph            graph;
3033       PetscBT                is_on_boundary;
3034 
3035       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3036       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3037       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3038       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3039       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3040       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3041       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3042       if (flg_row) {
3043         graph->xadj = xadj;
3044         graph->adjncy = adjncy;
3045       }
3046       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3047       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3048       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3049 
3050       if (pcbddc->dbg_flag) {
3051         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3052         for (i=0;i<graph->ncc;i++) {
3053           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3054         }
3055       }
3056 
3057       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3058       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3059       for (i=0;i<pcis->n_B;i++) {
3060         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3061       }
3062       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3063 
3064       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3065       sum = 0;
3066       for (i=0;i<graph->ncc;i++) {
3067         PetscInt sizecc = 0;
3068         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3069           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3070             sizecc++;
3071           }
3072         }
3073         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3074           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3075             cxadj[graph->queue[j]] = sizecc;
3076           }
3077         }
3078         sum += sizecc*sizecc;
3079       }
3080       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3081       sum = 0;
3082       for (i=0;i<pcis->n;i++) {
3083         PetscInt temp = cxadj[i];
3084         cxadj[i] = sum;
3085         sum += temp;
3086       }
3087       cxadj[pcis->n] = sum;
3088       for (i=0;i<graph->ncc;i++) {
3089         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3090           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3091             PetscInt k,sizecc = 0;
3092             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3093               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3094                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3095                 sizecc++;
3096               }
3097             }
3098           }
3099         }
3100       }
3101       if (sum) {
3102         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3103       } else {
3104         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3105         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3106       }
3107       graph->xadj = 0;
3108       graph->adjncy = 0;
3109       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3110       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3111     }
3112   }
3113   if (pcbddc->dbg_flag) {
3114     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3115   }
3116 
3117   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3118   vertex_size = 1;
3119   if (pcbddc->user_provided_isfordofs) {
3120     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3121       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3122       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3123         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3124         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3125       }
3126       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3127       pcbddc->n_ISForDofs = 0;
3128       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3129     }
3130     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3131     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3132   } else {
3133     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3134       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3135       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3136       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3137         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3138       }
3139     }
3140   }
3141 
3142   /* Setup of Graph */
3143   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3144     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3145   }
3146   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3147     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3148   }
3149   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3150 
3151   /* Graph's connected components analysis */
3152   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3153 
3154   /* print some info to stdout */
3155   if (pcbddc->dbg_flag) {
3156     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3157   }
3158 
3159   /* mark topography has done */
3160   pcbddc->recompute_topography = PETSC_FALSE;
3161   PetscFunctionReturn(0);
3162 }
3163 
3164 #undef __FUNCT__
3165 #define __FUNCT__ "PCBDDCSubsetNumbering"
3166 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3167 {
3168   Vec            local_vec,global_vec;
3169   IS             seqis,paris;
3170   VecScatter     scatter_ctx;
3171   PetscScalar    *array;
3172   PetscInt       *temp_global_dofs;
3173   PetscScalar    globalsum;
3174   PetscInt       i,j,s;
3175   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3176   PetscMPIInt    rank_prec_comm,size_prec_comm;
3177   PetscInt       *dof_sizes,*dof_displs;
3178   PetscBool      first_found;
3179   PetscErrorCode ierr;
3180 
3181   PetscFunctionBegin;
3182   /* mpi buffers */
3183   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3184   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3185   j = ( !rank_prec_comm ? size_prec_comm : 0);
3186   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3187   /* get maximum size of subset */
3188   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3189   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3190   max_local = 0;
3191   for (i=0;i<n_local_dofs;i++) {
3192     if (max_local < temp_global_dofs[i] ) {
3193       max_local = temp_global_dofs[i];
3194     }
3195   }
3196   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3197   max_global++;
3198   max_local = 0;
3199   for (i=0;i<n_local_dofs;i++) {
3200     if (max_local < local_dofs[i] ) {
3201       max_local = local_dofs[i];
3202     }
3203   }
3204   max_local++;
3205   /* allocate workspace */
3206   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3207   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3208   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3209   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3210   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3211   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3212   /* create scatter */
3213   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3214   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3215   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3216   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3217   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3218   /* init array */
3219   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3220   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3221   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3222   if (local_dofs_mult) {
3223     for (i=0;i<n_local_dofs;i++) {
3224       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3225     }
3226   } else {
3227     for (i=0;i<n_local_dofs;i++) {
3228       array[local_dofs[i]]=1.0;
3229     }
3230   }
3231   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3232   /* scatter into global vec and get total number of global dofs */
3233   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3234   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3236   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3237   /* Fill global_vec with cumulative function for global numbering */
3238   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3239   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3240   nlocals = 0;
3241   first_index = -1;
3242   first_found = PETSC_FALSE;
3243   for (i=0;i<s;i++) {
3244     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3245       first_found = PETSC_TRUE;
3246       first_index = i;
3247     }
3248     nlocals += (PetscInt)PetscRealPart(array[i]);
3249   }
3250   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3251   if (!rank_prec_comm) {
3252     dof_displs[0]=0;
3253     for (i=1;i<size_prec_comm;i++) {
3254       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3255     }
3256   }
3257   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3258   if (first_found) {
3259     array[first_index] += (PetscScalar)nlocals;
3260     old_index = first_index;
3261     for (i=first_index+1;i<s;i++) {
3262       if (PetscRealPart(array[i]) > 0.1) {
3263         array[i] += array[old_index];
3264         old_index = i;
3265       }
3266     }
3267   }
3268   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3269   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3270   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3271   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3272   /* get global ordering of local dofs */
3273   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3274   if (local_dofs_mult) {
3275     for (i=0;i<n_local_dofs;i++) {
3276       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3277     }
3278   } else {
3279     for (i=0;i<n_local_dofs;i++) {
3280       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3281     }
3282   }
3283   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3284   /* free workspace */
3285   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3286   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3287   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3288   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3289   /* return pointer to global ordering of local dofs */
3290   *global_numbering_subset = temp_global_dofs;
3291   PetscFunctionReturn(0);
3292 }
3293 
3294 #undef __FUNCT__
3295 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3296 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3297 {
3298   PetscInt       i,j;
3299   PetscScalar    *alphas;
3300   PetscErrorCode ierr;
3301 
3302   PetscFunctionBegin;
3303   /* this implements stabilized Gram-Schmidt */
3304   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3305   for (i=0;i<n;i++) {
3306     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3307     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3308     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3309   }
3310   ierr = PetscFree(alphas);CHKERRQ(ierr);
3311   PetscFunctionReturn(0);
3312 }
3313 
3314 #undef __FUNCT__
3315 #define __FUNCT__ "MatISGetSubassemblingPattern"
3316 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3317 {
3318   Mat             subdomain_adj;
3319   IS              new_ranks,ranks_send_to;
3320   MatPartitioning partitioner;
3321   Mat_IS          *matis;
3322   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3323   PetscInt        prank;
3324   PetscMPIInt     size,rank,color;
3325   PetscInt        *xadj,*adjncy,*oldranks;
3326   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3327   PetscInt        i,local_size,threshold=0;
3328   PetscErrorCode  ierr;
3329   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3330   PetscSubcomm    subcomm;
3331 
3332   PetscFunctionBegin;
3333   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3334   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3335   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3336 
3337   /* Get info on mapping */
3338   matis = (Mat_IS*)(mat->data);
3339   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3340   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3341 
3342   /* build local CSR graph of subdomains' connectivity */
3343   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3344   xadj[0] = 0;
3345   xadj[1] = PetscMax(n_neighs-1,0);
3346   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3347   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3348 
3349   if (threshold) {
3350     PetscInt xadj_count = 0;
3351     for (i=1;i<n_neighs;i++) {
3352       if (n_shared[i] > threshold) {
3353         adjncy[xadj_count] = neighs[i];
3354         adjncy_wgt[xadj_count] = n_shared[i];
3355         xadj_count++;
3356       }
3357     }
3358     xadj[1] = xadj_count;
3359   } else {
3360     if (xadj[1]) {
3361       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3362       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3363     }
3364   }
3365   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3366   if (use_square) {
3367     for (i=0;i<xadj[1];i++) {
3368       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3369     }
3370   }
3371   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3372 
3373   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3374 
3375   /*
3376     Restrict work on active processes only.
3377   */
3378   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3379   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3380   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3381   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3382   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3383   if (color) {
3384     ierr = PetscFree(xadj);CHKERRQ(ierr);
3385     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3386     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3387   } else {
3388     PetscInt coarsening_ratio;
3389     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3390     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3391     prank = rank;
3392     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3393     /*
3394     for (i=0;i<size;i++) {
3395       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3396     }
3397     */
3398     for (i=0;i<xadj[1];i++) {
3399       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3400     }
3401     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3402     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3403     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3404 
3405     /* Partition */
3406     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3407     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3408     if (use_vwgt) {
3409       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3410       v_wgt[0] = local_size;
3411       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3412     }
3413     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3414     coarsening_ratio = size/n_subdomains;
3415     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3416     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3417     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3418     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3419 
3420     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3421     if (contiguous) {
3422       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3423     } else {
3424       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3425     }
3426     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3427     /* clean up */
3428     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3429     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3430     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3431     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3432   }
3433   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3434 
3435   /* assemble parallel IS for sends */
3436   i = 1;
3437   if (color) i=0;
3438   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3439 
3440   /* get back IS */
3441   *is_sends = ranks_send_to;
3442   PetscFunctionReturn(0);
3443 }
3444 
3445 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3446 
3447 #undef __FUNCT__
3448 #define __FUNCT__ "MatISSubassemble"
3449 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3450 {
3451   Mat                    local_mat;
3452   Mat_IS                 *matis;
3453   IS                     is_sends_internal;
3454   PetscInt               rows,cols,new_local_rows;
3455   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3456   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3457   ISLocalToGlobalMapping l2gmap;
3458   PetscInt*              l2gmap_indices;
3459   const PetscInt*        is_indices;
3460   MatType                new_local_type;
3461   /* buffers */
3462   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3463   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3464   PetscInt               *recv_buffer_idxs_local;
3465   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3466   /* MPI */
3467   MPI_Comm               comm,comm_n;
3468   PetscSubcomm           subcomm;
3469   PetscMPIInt            n_sends,n_recvs,commsize;
3470   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3471   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3472   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3473   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3474   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3475   PetscErrorCode         ierr;
3476 
3477   PetscFunctionBegin;
3478   /* TODO: add missing checks */
3479   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3480   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3481   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3482   PetscValidLogicalCollectiveInt(mat,nis,7);
3483   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3484   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3485   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3486   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3487   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3488   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3489   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3490   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3491     PetscInt mrows,mcols,mnrows,mncols;
3492     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3493     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3494     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3495     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3496     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3497     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3498   }
3499   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3500   PetscValidLogicalCollectiveInt(mat,bs,0);
3501   /* prepare IS for sending if not provided */
3502   if (!is_sends) {
3503     PetscBool pcontig = PETSC_TRUE;
3504     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3505     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3506   } else {
3507     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3508     is_sends_internal = is_sends;
3509   }
3510 
3511   /* get pointer of MATIS data */
3512   matis = (Mat_IS*)mat->data;
3513 
3514   /* get comm */
3515   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3516 
3517   /* compute number of sends */
3518   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3519   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3520 
3521   /* compute number of receives */
3522   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3523   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3524   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3525   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3526   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3527   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3528   ierr = PetscFree(iflags);CHKERRQ(ierr);
3529 
3530   /* restrict comm if requested */
3531   subcomm = 0;
3532   destroy_mat = PETSC_FALSE;
3533   if (restrict_comm) {
3534     PetscMPIInt color,subcommsize;
3535 
3536     color = 0;
3537     if (!n_recvs && n_sends) color = 1; /* processes sending and not receiving anything will not partecipate in new comm */
3538     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3539     subcommsize = commsize - subcommsize;
3540     /* check if reuse has been requested */
3541     if (reuse == MAT_REUSE_MATRIX) {
3542       if (*mat_n) {
3543         PetscMPIInt subcommsize2;
3544         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3545         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3546         comm_n = PetscObjectComm((PetscObject)*mat_n);
3547       } else {
3548         comm_n = PETSC_COMM_SELF;
3549       }
3550     } else { /* MAT_INITIAL_MATRIX */
3551       PetscMPIInt rank;
3552 
3553       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3554       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3555       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3556       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3557       comm_n = PetscSubcommChild(subcomm);
3558     }
3559     /* flag to destroy *mat_n if not significative */
3560     if (color) destroy_mat = PETSC_TRUE;
3561   } else {
3562     comm_n = comm;
3563   }
3564 
3565   /* prepare send/receive buffers */
3566   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3567   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3568   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3569   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3570   if (nis) {
3571     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3572   }
3573 
3574   /* Get data from local matrices */
3575   if (!isdense) {
3576     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3577     /* TODO: See below some guidelines on how to prepare the local buffers */
3578     /*
3579        send_buffer_vals should contain the raw values of the local matrix
3580        send_buffer_idxs should contain:
3581        - MatType_PRIVATE type
3582        - PetscInt        size_of_l2gmap
3583        - PetscInt        global_row_indices[size_of_l2gmap]
3584        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3585     */
3586   } else {
3587     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3588     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3589     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3590     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3591     send_buffer_idxs[1] = i;
3592     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3593     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3594     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3595     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3596     for (i=0;i<n_sends;i++) {
3597       ilengths_vals[is_indices[i]] = len*len;
3598       ilengths_idxs[is_indices[i]] = len+2;
3599     }
3600   }
3601   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3602   /* additional is (if any) */
3603   if (nis) {
3604     PetscMPIInt psum;
3605     PetscInt j;
3606     for (j=0,psum=0;j<nis;j++) {
3607       PetscInt plen;
3608       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3609       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3610       psum += len+1; /* indices + lenght */
3611     }
3612     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3613     for (j=0,psum=0;j<nis;j++) {
3614       PetscInt plen;
3615       const PetscInt *is_array_idxs;
3616       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3617       send_buffer_idxs_is[psum] = plen;
3618       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3619       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3620       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3621       psum += plen+1; /* indices + lenght */
3622     }
3623     for (i=0;i<n_sends;i++) {
3624       ilengths_idxs_is[is_indices[i]] = psum;
3625     }
3626     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3627   }
3628 
3629   buf_size_idxs = 0;
3630   buf_size_vals = 0;
3631   buf_size_idxs_is = 0;
3632   for (i=0;i<n_recvs;i++) {
3633     buf_size_idxs += (PetscInt)olengths_idxs[i];
3634     buf_size_vals += (PetscInt)olengths_vals[i];
3635     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3636   }
3637   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3638   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3639   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3640 
3641   /* get new tags for clean communications */
3642   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3643   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3644   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3645 
3646   /* allocate for requests */
3647   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3648   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3649   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3650   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3651   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3652   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3653 
3654   /* communications */
3655   ptr_idxs = recv_buffer_idxs;
3656   ptr_vals = recv_buffer_vals;
3657   ptr_idxs_is = recv_buffer_idxs_is;
3658   for (i=0;i<n_recvs;i++) {
3659     source_dest = onodes[i];
3660     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3661     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3662     ptr_idxs += olengths_idxs[i];
3663     ptr_vals += olengths_vals[i];
3664     if (nis) {
3665       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3666       ptr_idxs_is += olengths_idxs_is[i];
3667     }
3668   }
3669   for (i=0;i<n_sends;i++) {
3670     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3671     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3672     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3673     if (nis) {
3674       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3675     }
3676   }
3677   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3678   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3679 
3680   /* assemble new l2g map */
3681   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3682   ptr_idxs = recv_buffer_idxs;
3683   new_local_rows = 0;
3684   for (i=0;i<n_recvs;i++) {
3685     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3686     ptr_idxs += olengths_idxs[i];
3687   }
3688   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3689   ptr_idxs = recv_buffer_idxs;
3690   new_local_rows = 0;
3691   for (i=0;i<n_recvs;i++) {
3692     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3693     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3694     ptr_idxs += olengths_idxs[i];
3695   }
3696   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3697   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3698   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3699 
3700   /* infer new local matrix type from received local matrices type */
3701   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3702   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3703   if (n_recvs) {
3704     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3705     ptr_idxs = recv_buffer_idxs;
3706     for (i=0;i<n_recvs;i++) {
3707       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3708         new_local_type_private = MATAIJ_PRIVATE;
3709         break;
3710       }
3711       ptr_idxs += olengths_idxs[i];
3712     }
3713     switch (new_local_type_private) {
3714       case MATDENSE_PRIVATE:
3715         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3716           new_local_type = MATSEQAIJ;
3717           bs = 1;
3718         } else { /* if I receive only 1 dense matrix */
3719           new_local_type = MATSEQDENSE;
3720           bs = 1;
3721         }
3722         break;
3723       case MATAIJ_PRIVATE:
3724         new_local_type = MATSEQAIJ;
3725         bs = 1;
3726         break;
3727       case MATBAIJ_PRIVATE:
3728         new_local_type = MATSEQBAIJ;
3729         break;
3730       case MATSBAIJ_PRIVATE:
3731         new_local_type = MATSEQSBAIJ;
3732         break;
3733       default:
3734         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3735         break;
3736     }
3737   } else { /* by default, new_local_type is seqdense */
3738     new_local_type = MATSEQDENSE;
3739     bs = 1;
3740   }
3741 
3742   /* create MATIS object if needed */
3743   if (reuse == MAT_INITIAL_MATRIX) {
3744     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3745     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3746   } else {
3747     /* it also destroys the local matrices */
3748     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3749   }
3750   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3751   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3752 
3753   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3754 
3755   /* Global to local map of received indices */
3756   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3757   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3758   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3759 
3760   /* restore attributes -> type of incoming data and its size */
3761   buf_size_idxs = 0;
3762   for (i=0;i<n_recvs;i++) {
3763     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3764     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3765     buf_size_idxs += (PetscInt)olengths_idxs[i];
3766   }
3767   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3768 
3769   /* set preallocation */
3770   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3771   if (!newisdense) {
3772     PetscInt *new_local_nnz=0;
3773 
3774     ptr_vals = recv_buffer_vals;
3775     ptr_idxs = recv_buffer_idxs_local;
3776     if (n_recvs) {
3777       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3778     }
3779     for (i=0;i<n_recvs;i++) {
3780       PetscInt j;
3781       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3782         for (j=0;j<*(ptr_idxs+1);j++) {
3783           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3784         }
3785       } else {
3786         /* TODO */
3787       }
3788       ptr_idxs += olengths_idxs[i];
3789     }
3790     if (new_local_nnz) {
3791       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3792       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3793       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3794       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3795       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3796       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3797     } else {
3798       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3799     }
3800     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3801   } else {
3802     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3803   }
3804 
3805   /* set values */
3806   ptr_vals = recv_buffer_vals;
3807   ptr_idxs = recv_buffer_idxs_local;
3808   for (i=0;i<n_recvs;i++) {
3809     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3810       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3811       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3812       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3813       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3814       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3815     } else {
3816       /* TODO */
3817     }
3818     ptr_idxs += olengths_idxs[i];
3819     ptr_vals += olengths_vals[i];
3820   }
3821   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3822   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3823   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3824   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3825   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3826   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3827 
3828 #if 0
3829   if (!restrict_comm) { /* check */
3830     Vec       lvec,rvec;
3831     PetscReal infty_error;
3832 
3833     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3834     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3835     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3836     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3837     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3838     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3839     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3840     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3841     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3842   }
3843 #endif
3844 
3845   /* assemble new additional is (if any) */
3846   if (nis) {
3847     PetscInt **temp_idxs,*count_is,j,psum;
3848 
3849     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3850     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3851     ptr_idxs = recv_buffer_idxs_is;
3852     psum = 0;
3853     for (i=0;i<n_recvs;i++) {
3854       for (j=0;j<nis;j++) {
3855         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3856         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3857         psum += plen;
3858         ptr_idxs += plen+1; /* shift pointer to received data */
3859       }
3860     }
3861     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3862     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3863     for (i=1;i<nis;i++) {
3864       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3865     }
3866     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3867     ptr_idxs = recv_buffer_idxs_is;
3868     for (i=0;i<n_recvs;i++) {
3869       for (j=0;j<nis;j++) {
3870         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3871         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3872         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3873         ptr_idxs += plen+1; /* shift pointer to received data */
3874       }
3875     }
3876     for (i=0;i<nis;i++) {
3877       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3878       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3879       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3880     }
3881     ierr = PetscFree(count_is);CHKERRQ(ierr);
3882     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3883     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3884   }
3885   /* free workspace */
3886   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3887   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3888   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3889   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3890   if (isdense) {
3891     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3892     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3893   } else {
3894     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3895   }
3896   if (nis) {
3897     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3898     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3899   }
3900   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3901   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3902   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3903   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3904   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3905   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3906   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3907   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3908   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3909   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3910   ierr = PetscFree(onodes);CHKERRQ(ierr);
3911   if (nis) {
3912     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3913     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3914     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3915   }
3916   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3917   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3918     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3919     for (i=0;i<nis;i++) {
3920       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3921     }
3922   }
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 /* temporary hack into ksp private data structure */
3927 #include <petsc/private/kspimpl.h>
3928 
3929 #undef __FUNCT__
3930 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3931 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3932 {
3933   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3934   PC_IS                  *pcis = (PC_IS*)pc->data;
3935   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3936   MatNullSpace           CoarseNullSpace=NULL;
3937   ISLocalToGlobalMapping coarse_islg;
3938   IS                     coarse_is,*isarray;
3939   PetscInt               i,im_active=-1,active_procs=-1;
3940   PetscInt               nis,nisdofs,nisneu;
3941   PC                     pc_temp;
3942   PCType                 coarse_pc_type;
3943   KSPType                coarse_ksp_type;
3944   PetscBool              multilevel_requested,multilevel_allowed;
3945   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3946   Mat                    t_coarse_mat_is;
3947   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3948   PetscMPIInt            all_procs;
3949   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3950   PetscBool              compute_vecs = PETSC_FALSE;
3951   PetscScalar            *array;
3952   PetscErrorCode         ierr;
3953 
3954   PetscFunctionBegin;
3955   /* Assign global numbering to coarse dofs */
3956   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3957     PetscInt ocoarse_size;
3958     compute_vecs = PETSC_TRUE;
3959     ocoarse_size = pcbddc->coarse_size;
3960     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3961     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3962     /* see if we can avoid some work */
3963     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3964       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3965         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3966         coarse_reuse = PETSC_FALSE;
3967       } else { /* we can safely reuse already computed coarse matrix */
3968         coarse_reuse = PETSC_TRUE;
3969       }
3970     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3971       coarse_reuse = PETSC_FALSE;
3972     }
3973     /* reset any subassembling information */
3974     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3975     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3976   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3977     coarse_reuse = PETSC_TRUE;
3978   }
3979 
3980   /* count "active" (i.e. with positive local size) and "void" processes */
3981   im_active = !!(pcis->n);
3982   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3983   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3984   void_procs = all_procs-active_procs;
3985   csin_type_simple = PETSC_TRUE;
3986   redist = PETSC_FALSE;
3987   if (pcbddc->current_level && void_procs) {
3988     csin_ml = PETSC_TRUE;
3989     ncoarse_ml = void_procs;
3990     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3991     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3992       csin_ds = PETSC_TRUE;
3993       ncoarse_ds = pcbddc->redistribute_coarse;
3994       redist = PETSC_TRUE;
3995     } else {
3996       csin_ds = PETSC_TRUE;
3997       ncoarse_ds = active_procs;
3998       redist = PETSC_TRUE;
3999     }
4000   } else {
4001     csin_ml = PETSC_FALSE;
4002     ncoarse_ml = all_procs;
4003     if (void_procs) {
4004       csin_ds = PETSC_TRUE;
4005       ncoarse_ds = void_procs;
4006       csin_type_simple = PETSC_FALSE;
4007     } else {
4008       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4009         csin_ds = PETSC_TRUE;
4010         ncoarse_ds = pcbddc->redistribute_coarse;
4011         redist = PETSC_TRUE;
4012       } else {
4013         csin_ds = PETSC_FALSE;
4014         ncoarse_ds = all_procs;
4015       }
4016     }
4017   }
4018 
4019   /*
4020     test if we can go multilevel: three conditions must be satisfied:
4021     - we have not exceeded the number of levels requested
4022     - we can actually subassemble the active processes
4023     - we can find a suitable number of MPI processes where we can place the subassembled problem
4024   */
4025   multilevel_allowed = PETSC_FALSE;
4026   multilevel_requested = PETSC_FALSE;
4027   if (pcbddc->current_level < pcbddc->max_levels) {
4028     multilevel_requested = PETSC_TRUE;
4029     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4030       multilevel_allowed = PETSC_FALSE;
4031     } else {
4032       multilevel_allowed = PETSC_TRUE;
4033     }
4034   }
4035   /* determine number of process partecipating to coarse solver */
4036   if (multilevel_allowed) {
4037     ncoarse = ncoarse_ml;
4038     csin = csin_ml;
4039     redist = PETSC_FALSE;
4040   } else {
4041     ncoarse = ncoarse_ds;
4042     csin = csin_ds;
4043   }
4044 
4045   /* creates temporary l2gmap and IS for coarse indexes */
4046   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4047   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4048 
4049   /* creates temporary MATIS object for coarse matrix */
4050   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4051   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4052   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4053   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4054 #if 0
4055   {
4056     PetscViewer viewer;
4057     char filename[256];
4058     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4059     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4060     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4061     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4062     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4063   }
4064 #endif
4065   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4066   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4067   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4068   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4069   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4070 
4071   /* compute dofs splitting and neumann boundaries for coarse dofs */
4072   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4073     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4074     const PetscInt         *idxs;
4075     ISLocalToGlobalMapping tmap;
4076 
4077     /* create map between primal indices (in local representative ordering) and local primal numbering */
4078     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4079     /* allocate space for temporary storage */
4080     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4081     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4082     /* allocate for IS array */
4083     nisdofs = pcbddc->n_ISForDofsLocal;
4084     nisneu = !!pcbddc->NeumannBoundariesLocal;
4085     nis = nisdofs + nisneu;
4086     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4087     /* dofs splitting */
4088     for (i=0;i<nisdofs;i++) {
4089       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4090       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4091       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4092       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4093       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4094       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4095       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4096       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4097     }
4098     /* neumann boundaries */
4099     if (pcbddc->NeumannBoundariesLocal) {
4100       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4101       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4102       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4103       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4104       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4105       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4106       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4107       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4108     }
4109     /* free memory */
4110     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4111     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4112     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4113   } else {
4114     nis = 0;
4115     nisdofs = 0;
4116     nisneu = 0;
4117     isarray = NULL;
4118   }
4119   /* destroy no longer needed map */
4120   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4121 
4122   /* restrict on coarse candidates (if needed) */
4123   coarse_mat_is = NULL;
4124   if (csin) {
4125     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4126       if (redist) {
4127         PetscMPIInt rank;
4128         PetscInt    spc,n_spc_p1,dest[1],destsize;
4129 
4130         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4131         spc = active_procs/ncoarse;
4132         n_spc_p1 = active_procs%ncoarse;
4133         if (im_active) {
4134           destsize = 1;
4135           if (rank > n_spc_p1*(spc+1)-1) {
4136             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4137           } else {
4138             dest[0] = rank/(spc+1);
4139           }
4140         } else {
4141           destsize = 0;
4142         }
4143         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4144       } else if (csin_type_simple) {
4145         PetscMPIInt rank;
4146         PetscInt    issize,isidx;
4147 
4148         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4149         if (im_active) {
4150           issize = 1;
4151           isidx = (PetscInt)rank;
4152         } else {
4153           issize = 0;
4154           isidx = -1;
4155         }
4156         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4157       } else { /* get a suitable subassembling pattern from MATIS code */
4158         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4159       }
4160 
4161       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4162       if (!redist || ncoarse <= void_procs) {
4163         PetscInt ncoarse_cand,tissize,*nisindices;
4164         PetscInt *coarse_candidates;
4165         const PetscInt* tisindices;
4166 
4167         /* get coarse candidates' ranks in pc communicator */
4168         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4169         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4170         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4171           if (!coarse_candidates[i]) {
4172             coarse_candidates[ncoarse_cand++]=i;
4173           }
4174         }
4175         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4176 
4177 
4178         if (pcbddc->dbg_flag) {
4179           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4180           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4181           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4182           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4183           for (i=0;i<ncoarse_cand;i++) {
4184             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4185           }
4186           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4187           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4188         }
4189         /* shift the pattern on coarse candidates */
4190         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4191         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4192         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4193         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4194         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4195         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4196         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4197       }
4198       if (pcbddc->dbg_flag) {
4199         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4200         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4201         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4202         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4203       }
4204     }
4205     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4206     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4207   } else {
4208     if (pcbddc->dbg_flag) {
4209       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4210       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4211       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4212     }
4213     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4214     coarse_mat_is = t_coarse_mat_is;
4215   }
4216 
4217   /* create local to global scatters for coarse problem */
4218   if (compute_vecs) {
4219     PetscInt lrows;
4220     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4221     if (coarse_mat_is) {
4222       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4223     } else {
4224       lrows = 0;
4225     }
4226     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4227     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4228     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4229     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4230     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4231   }
4232   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4233   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4234 
4235   /* set defaults for coarse KSP and PC */
4236   if (multilevel_allowed) {
4237     coarse_ksp_type = KSPRICHARDSON;
4238     coarse_pc_type = PCBDDC;
4239   } else {
4240     coarse_ksp_type = KSPPREONLY;
4241     coarse_pc_type = PCREDUNDANT;
4242   }
4243 
4244   /* print some info if requested */
4245   if (pcbddc->dbg_flag) {
4246     if (!multilevel_allowed) {
4247       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4248       if (multilevel_requested) {
4249         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4250       } else if (pcbddc->max_levels) {
4251         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4252       }
4253       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4254     }
4255   }
4256 
4257   /* create the coarse KSP object only once with defaults */
4258   if (coarse_mat_is) {
4259     MatReuse coarse_mat_reuse;
4260     PetscViewer dbg_viewer = NULL;
4261     if (pcbddc->dbg_flag) {
4262       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4263       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4264     }
4265     if (!pcbddc->coarse_ksp) {
4266       char prefix[256],str_level[16];
4267       size_t len;
4268       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4269       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4270       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4271       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4272       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4273       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4274       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4275       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4276       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4277       /* prefix */
4278       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4279       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4280       if (!pcbddc->current_level) {
4281         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4282         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4283       } else {
4284         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4285         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4286         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4287         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4288         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4289         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4290       }
4291       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4292       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4293       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4294       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4295       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4296       /* allow user customization */
4297       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4298     }
4299     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4300     if (nisdofs) {
4301       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4302       for (i=0;i<nisdofs;i++) {
4303         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4304       }
4305     }
4306     if (nisneu) {
4307       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4308       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4309     }
4310 
4311     /* get some info after set from options */
4312     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4313     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4314     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4315     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4316     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4317       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4318       isbddc = PETSC_FALSE;
4319     }
4320     if (isredundant) {
4321       KSP inner_ksp;
4322       PC inner_pc;
4323       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4324       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4325       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4326     }
4327 
4328     /* assemble coarse matrix */
4329     if (coarse_reuse) {
4330       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4331       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4332       coarse_mat_reuse = MAT_REUSE_MATRIX;
4333     } else {
4334       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4335     }
4336     if (isbddc || isnn) {
4337       if (pcbddc->coarsening_ratio > 1) {
4338         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4339           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4340           if (pcbddc->dbg_flag) {
4341             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4342             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4343             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4344             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4345           }
4346         }
4347         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4348       } else {
4349         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4350         coarse_mat = coarse_mat_is;
4351       }
4352     } else {
4353       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4354     }
4355     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4356 
4357     /* propagate symmetry info to coarse matrix */
4358     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4359     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4360 
4361     /* set operators */
4362     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4363     if (pcbddc->dbg_flag) {
4364       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4365     }
4366   } else { /* processes non partecipating to coarse solver (if any) */
4367     coarse_mat = 0;
4368   }
4369   ierr = PetscFree(isarray);CHKERRQ(ierr);
4370 #if 0
4371   {
4372     PetscViewer viewer;
4373     char filename[256];
4374     sprintf(filename,"coarse_mat.m");
4375     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4376     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4377     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4378     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4379   }
4380 #endif
4381 
4382   /* Compute coarse null space (special handling by BDDC only) */
4383   if (pcbddc->NullSpace) {
4384     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4385   }
4386 
4387   if (pcbddc->coarse_ksp) {
4388     Vec crhs,csol;
4389     PetscBool ispreonly;
4390     if (CoarseNullSpace) {
4391       if (isbddc) {
4392         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4393       } else {
4394         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4395       }
4396     }
4397     /* setup coarse ksp */
4398     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4399     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4400     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4401     /* hack */
4402     if (!csol) {
4403       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4404     }
4405     if (!crhs) {
4406       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4407     }
4408     /* Check coarse problem if in debug mode or if solving with an iterative method */
4409     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4410     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4411       KSP       check_ksp;
4412       KSPType   check_ksp_type;
4413       PC        check_pc;
4414       Vec       check_vec,coarse_vec;
4415       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4416       PetscInt  its;
4417       PetscBool compute_eigs;
4418       PetscReal *eigs_r,*eigs_c;
4419       PetscInt  neigs;
4420       const char *prefix;
4421 
4422       /* Create ksp object suitable for estimation of extreme eigenvalues */
4423       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4424       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4425       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4426       if (ispreonly) {
4427         check_ksp_type = KSPPREONLY;
4428         compute_eigs = PETSC_FALSE;
4429       } else {
4430         check_ksp_type = KSPGMRES;
4431         compute_eigs = PETSC_TRUE;
4432       }
4433       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4434       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4435       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4436       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4437       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4438       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4439       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4440       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4441       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4442       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4443       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4444       /* create random vec */
4445       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4446       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4447       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4448       if (CoarseNullSpace) {
4449         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4450       }
4451       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4452       /* solve coarse problem */
4453       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4454       if (CoarseNullSpace) {
4455         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4456       }
4457       /* set eigenvalue estimation if preonly has not been requested */
4458       if (compute_eigs) {
4459         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4460         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4461         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4462         lambda_max = eigs_r[neigs-1];
4463         lambda_min = eigs_r[0];
4464         if (pcbddc->use_coarse_estimates) {
4465           if (lambda_max>lambda_min) {
4466             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4467             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4468           }
4469         }
4470       }
4471 
4472       /* check coarse problem residual error */
4473       if (pcbddc->dbg_flag) {
4474         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4475         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4476         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4477         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4478         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4479         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4480         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4481         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4482         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4483         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4484         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4485         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4486         if (compute_eigs) {
4487           PetscReal lambda_max_s,lambda_min_s;
4488           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4489           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4490           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4491           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4492           for (i=0;i<neigs;i++) {
4493             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4494           }
4495         }
4496         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4497         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4498       }
4499       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4500       if (compute_eigs) {
4501         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4502         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4503       }
4504     }
4505   }
4506   /* print additional info */
4507   if (pcbddc->dbg_flag) {
4508     /* waits until all processes reaches this point */
4509     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4510     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4511     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4512   }
4513 
4514   /* free memory */
4515   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4516   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4517   PetscFunctionReturn(0);
4518 }
4519 
4520 #undef __FUNCT__
4521 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4522 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4523 {
4524   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4525   PC_IS*         pcis = (PC_IS*)pc->data;
4526   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4527   PetscInt       i,local_size,coarse_size=0;
4528   PetscInt       *local_primal_indices=NULL;
4529   PetscInt       *t_local_primal_indices=NULL;
4530   PetscErrorCode ierr;
4531 
4532   PetscFunctionBegin;
4533   /* Compute global number of coarse dofs */
4534   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4535     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4536   }
4537   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult,&coarse_size,&t_local_primal_indices);CHKERRQ(ierr);
4538   ierr = PetscMalloc1(pcbddc->local_primal_size,&local_primal_indices);CHKERRQ(ierr);
4539   local_size = 0;
4540   for (i=0;i<pcbddc->local_primal_size_cc;i++) {
4541     PetscInt j;
4542     for (j=0;j<pcbddc->local_primal_ref_mult[i];j++) local_primal_indices[local_size++] = t_local_primal_indices[i] + j;
4543   }
4544   ierr = PetscFree(t_local_primal_indices);CHKERRQ(ierr);
4545   if (local_size != pcbddc->local_primal_size) {
4546     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4547   }
4548 
4549   /* check numbering */
4550   if (pcbddc->dbg_flag) {
4551     PetscScalar coarsesum,*array;
4552     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4553 
4554     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4555     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4556     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4557     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4558     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4559     for (i=0;i<pcbddc->local_primal_size;i++) {
4560       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4561     }
4562     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4563     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4564     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4565     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4566     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4567     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4568     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4569     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4570     for (i=0;i<pcis->n;i++) {
4571       if (array[i] == 1.0) {
4572         set_error = PETSC_TRUE;
4573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4574       }
4575     }
4576     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4577     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4578     for (i=0;i<pcis->n;i++) {
4579       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4580     }
4581     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4582     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4583     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4584     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4585     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4586     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4587     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4588       PetscInt *gidxs;
4589 
4590       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4591       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4592       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4593       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4594       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4595       for (i=0;i<pcbddc->local_primal_size;i++) {
4596         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4597       }
4598       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4599       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4600     }
4601     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4602     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4603   }
4604   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4605   /* get back data */
4606   *coarse_size_n = coarse_size;
4607   *local_primal_indices_n = local_primal_indices;
4608   PetscFunctionReturn(0);
4609 }
4610 
4611 #undef __FUNCT__
4612 #define __FUNCT__ "PCBDDCGlobalToLocal"
4613 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4614 {
4615   IS             localis_t;
4616   PetscInt       i,lsize,*idxs,n;
4617   PetscScalar    *vals;
4618   PetscErrorCode ierr;
4619 
4620   PetscFunctionBegin;
4621   /* get indices in local ordering exploiting local to global map */
4622   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4623   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4624   for (i=0;i<lsize;i++) vals[i] = 1.0;
4625   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4626   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4627   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4628   if (idxs) { /* multilevel guard */
4629     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4630   }
4631   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4632   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4633   ierr = PetscFree(vals);CHKERRQ(ierr);
4634   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4635   /* now compute set in local ordering */
4636   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4637   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4638   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4639   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4640   for (i=0,lsize=0;i<n;i++) {
4641     if (PetscRealPart(vals[i]) > 0.5) {
4642       lsize++;
4643     }
4644   }
4645   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4646   for (i=0,lsize=0;i<n;i++) {
4647     if (PetscRealPart(vals[i]) > 0.5) {
4648       idxs[lsize++] = i;
4649     }
4650   }
4651   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4652   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4653   *localis = localis_t;
4654   PetscFunctionReturn(0);
4655 }
4656 
4657 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4658 #undef __FUNCT__
4659 #define __FUNCT__ "PCBDDCMatMult_Private"
4660 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4661 {
4662   PCBDDCChange_ctx change_ctx;
4663   PetscErrorCode   ierr;
4664 
4665   PetscFunctionBegin;
4666   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4667   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4668   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4669   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4670   PetscFunctionReturn(0);
4671 }
4672 
4673 #undef __FUNCT__
4674 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4675 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4676 {
4677   PCBDDCChange_ctx change_ctx;
4678   PetscErrorCode   ierr;
4679 
4680   PetscFunctionBegin;
4681   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4682   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4683   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4684   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4685   PetscFunctionReturn(0);
4686 }
4687 
4688 #undef __FUNCT__
4689 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4690 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4691 {
4692   PC_IS               *pcis=(PC_IS*)pc->data;
4693   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4694   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4695   Mat                 S_j;
4696   PetscInt            *used_xadj,*used_adjncy;
4697   PetscBool           free_used_adj;
4698   PetscErrorCode      ierr;
4699 
4700   PetscFunctionBegin;
4701   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4702   free_used_adj = PETSC_FALSE;
4703   if (pcbddc->sub_schurs_layers == -1) {
4704     used_xadj = NULL;
4705     used_adjncy = NULL;
4706   } else {
4707     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4708       used_xadj = pcbddc->mat_graph->xadj;
4709       used_adjncy = pcbddc->mat_graph->adjncy;
4710     } else if (pcbddc->computed_rowadj) {
4711       used_xadj = pcbddc->mat_graph->xadj;
4712       used_adjncy = pcbddc->mat_graph->adjncy;
4713     } else {
4714       PetscBool      flg_row=PETSC_FALSE;
4715       const PetscInt *xadj,*adjncy;
4716       PetscInt       nvtxs;
4717 
4718       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4719       if (flg_row) {
4720         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4721         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4722         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4723         free_used_adj = PETSC_TRUE;
4724       } else {
4725         pcbddc->sub_schurs_layers = -1;
4726         used_xadj = NULL;
4727         used_adjncy = NULL;
4728       }
4729       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4730     }
4731   }
4732 
4733   /* setup sub_schurs data */
4734   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4735   if (!sub_schurs->use_mumps) {
4736     /* pcbddc->ksp_D up to date only if not using MUMPS */
4737     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4738     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4739   } else {
4740     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
4741     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4742   }
4743   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4744 
4745   /* free adjacency */
4746   if (free_used_adj) {
4747     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4748   }
4749   PetscFunctionReturn(0);
4750 }
4751 
4752 #undef __FUNCT__
4753 #define __FUNCT__ "PCBDDCInitSubSchurs"
4754 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4755 {
4756   PC_IS               *pcis=(PC_IS*)pc->data;
4757   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4758   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4759   PCBDDCGraph         graph;
4760   PetscErrorCode      ierr;
4761 
4762   PetscFunctionBegin;
4763   /* attach interface graph for determining subsets */
4764   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4765     IS verticesIS;
4766 
4767     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4768     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4769     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4770     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4771     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4772     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4773 /*
4774     if (pcbddc->dbg_flag) {
4775       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4776     }
4777 */
4778   } else {
4779     graph = pcbddc->mat_graph;
4780   }
4781 
4782   /* sub_schurs init */
4783   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4784 
4785   /* free graph struct */
4786   if (pcbddc->sub_schurs_rebuild) {
4787     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4788   }
4789   PetscFunctionReturn(0);
4790 }
4791