xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 686e3a490684ad76a6ce8320d41ecb571c0dc10d)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       cum += subset_size*n;
66       cum2 += n;
67       maxneigs = PetscMax(maxneigs,n);
68     }
69   }
70 
71   if (mss) {
72     if (symmetric) {
73       PetscBLASInt B_itype = 1;
74       PetscBLASInt B_N = mss;
75       PetscScalar  zero = 0.0;
76       PetscScalar  eps = 0.0; /* dlamch? */
77 
78       B_lwork = -1;
79       S = NULL;
80       St = NULL;
81       eigs = NULL;
82       eigv = NULL;
83       B_iwork = NULL;
84       B_ifail = NULL;
85       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
86 #if defined(PETSC_USE_COMPLEX)
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
88 #else
89       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
90 #endif
91       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
92       ierr = PetscFPTrapPop();CHKERRQ(ierr);
93     } else {
94         /* TODO */
95     }
96   } else {
97     lwork = 0;
98   }
99 
100   nv = 0;
101   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
102     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
103   }
104   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
105   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
106                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
107 #if defined(PETSC_USE_COMPLEX)
108   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
109 #endif
110   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
111   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
112                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
113                       nv+cum,&pcbddc->adaptive_constraints_idxs,
114                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
115   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
116 
117   maxneigs = 0;
118   cum = cum2 = cumarray = 0;
119   if (sub_schurs->is_Ej_com) {
120     const PetscInt *idxs;
121 
122     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
123     for (cum=0;cum<nv;cum++) {
124       pcbddc->adaptive_constraints_n[cum] = 1;
125       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
126       pcbddc->adaptive_constraints_ptrs[cum] = cum;
127       pcbddc->adaptive_constraints_data[cum] = 1.0;
128     }
129     cum2 = cum;
130     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
131   }
132 
133   if (mss) { /* multilevel */
134     if (pcbddc->use_deluxe_scaling) {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
136     } else {
137       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
138     }
139     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
140   }
141 
142   for (i=0;i<sub_schurs->n_subs;i++) {
143     PetscInt j,subset_size;
144 
145     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
146     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
147       const PetscInt *idxs;
148       PetscScalar    one = 1.0,zero=0.0;
149       PetscBLASInt   B_N;
150 
151       /* S should be copied since we need it for deluxe scaling */
152       if (symmetric) {
153         PetscInt j;
154         for (j=0;j<subset_size;j++) {
155           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
156         }
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160       } else {
161         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
162         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
163       }
164       /* is always this the right matrix? */
165       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166 
167       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
168       /* St = Starray+cumarray; */
169 
170       if (symmetric) {
171         PetscBLASInt B_itype = 1;
172         PetscBLASInt B_IL = 1, B_IU;
173         PetscScalar  eps = -1.0; /* dlamch? */
174         PetscInt     nmin_s;
175 
176         /* ask for eigenvalues lower than thresh */
177         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
178         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
179         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
180 #if defined(PETSC_USE_COMPLEX)
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
182 #else
183         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
184 #endif
185         ierr = PetscFPTrapPop();CHKERRQ(ierr);
186         if (B_ierr) {
187           if (B_ierr < 0 ) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
189           } else if (B_ierr <= B_N) {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
191           } else {
192             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
193           }
194         }
195 
196         if (B_neigs > nmax) {
197           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
198           B_neigs = nmax;
199         }
200 
201         nmin_s = PetscMin(nmin,B_N);
202         if (B_neigs < nmin_s) {
203           PetscBLASInt B_neigs2;
204 
205           B_IL = B_neigs + 1;
206           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
207           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
208           if (symmetric) {
209             PetscInt j;
210             for (j=0;j<subset_size;j++) {
211               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
212             }
213             for (j=0;j<subset_size;j++) {
214               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
215             }
216           } else {
217             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
219           }
220           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
221 #if defined(PETSC_USE_COMPLEX)
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
223 #else
224           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
225 #endif
226           ierr = PetscFPTrapPop();CHKERRQ(ierr);
227           B_neigs += B_neigs2;
228         }
229         if (B_ierr) {
230           if (B_ierr < 0 ) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
232           } else if (B_ierr <= B_N) {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
234           } else {
235             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
236           }
237         }
238         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
239         for (j=0;j<B_neigs;j++) {
240           if (eigs[j] == 0.0) {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
242           } else {
243             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
244           }
245         }
246       } else {
247           /* TODO */
248       }
249       maxneigs = PetscMax(B_neigs,maxneigs);
250       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
251 
252       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
253       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
254       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
255 #if 0
256       PetscInt ii;
257       for (ii=0;ii<B_neigs;ii++) {
258         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
259         for (j=0;j<B_N;j++) {
260           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
261         }
262       }
263 #endif
264       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
265       for (j=0;j<B_neigs;j++) {
266 #if 0
267         {
268           PetscBLASInt Blas_N,Blas_one = 1.0;
269           PetscScalar norm;
270           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
271           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
272           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
273             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
274           } else {
275             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
276           }
277           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
278         }
279 #endif
280         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
281         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
282         cum2 += subset_size;
283       }
284       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
285     }
286     /* shift for next computation */
287     cumarray += subset_size*subset_size;
288   }
289   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
290   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
291 
292   if (mss) {
293     if (pcbddc->use_deluxe_scaling) {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
295     } else {
296       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
297     }
298     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
299   }
300   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
301 #if defined(PETSC_USE_COMPLEX)
302   ierr = PetscFree(rwork);CHKERRQ(ierr);
303 #endif
304   if (pcbddc->dbg_flag) {
305     PetscInt maxneigs_r;
306     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
307     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "PCBDDCSetUpSolvers"
314 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
315 {
316   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
317   PetscScalar    *coarse_submat_vals;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   /* Setup local scatters R_to_B and (optionally) R_to_D */
322   /* PCBDDCSetUpLocalWorkVectors should be called first! */
323   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
324 
325   /* Setup local neumann solver ksp_R */
326   /* PCBDDCSetUpLocalScatters should be called first! */
327   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
328 
329   /* Change global null space passed in by the user if change of basis has been requested */
330   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
331     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
332   }
333 
334   /*
335      Setup local correction and local part of coarse basis.
336      Gives back the dense local part of the coarse matrix in column major ordering
337   */
338   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
339 
340   /* Compute total number of coarse nodes and setup coarse solver */
341   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
342 
343   /* free */
344   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "PCBDDCResetCustomization"
350 PetscErrorCode PCBDDCResetCustomization(PC pc)
351 {
352   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
358   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
360   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
361   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
362   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
363   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
364   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
365   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
366   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetTopography"
372 PetscErrorCode PCBDDCResetTopography(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
379   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
380   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
381   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
382   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNCT__
387 #define __FUNCT__ "PCBDDCResetSolvers"
388 PetscErrorCode PCBDDCResetSolvers(PC pc)
389 {
390   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
391   PetscScalar    *array;
392   PetscErrorCode ierr;
393 
394   PetscFunctionBegin;
395   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
396   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
397   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
398   ierr = PetscFree(array);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
404   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
405   if (pcbddc->local_auxmat2) {
406     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
407     ierr = PetscFree(array);CHKERRQ(ierr);
408   }
409   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
412   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
416   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
419   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
422   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
424   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
431 {
432   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
433   PC_IS          *pcis = (PC_IS*)pc->data;
434   VecType        impVecType;
435   PetscInt       n_constraints,n_R,old_size;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   if (!pcbddc->ConstraintMatrix) {
440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
441   }
442   /* get sizes */
443   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
444   n_R = pcis->n-pcbddc->n_actual_vertices;
445   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
446   /* local work vectors (try to avoid unneeded work)*/
447   /* R nodes */
448   old_size = -1;
449   if (pcbddc->vec1_R) {
450     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
451   }
452   if (n_R != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
455     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
456     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
457     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
458     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
459   }
460   /* local primal dofs */
461   old_size = -1;
462   if (pcbddc->vec1_P) {
463     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
464   }
465   if (pcbddc->local_primal_size != old_size) {
466     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
468     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
470   }
471   /* local explicit constraints */
472   old_size = -1;
473   if (pcbddc->vec1_C) {
474     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
475   }
476   if (n_constraints && n_constraints != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCBDDCSetUpCorrection"
487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
488 {
489   PetscErrorCode         ierr;
490   /* pointers to pcis and pcbddc */
491   PC_IS*                 pcis = (PC_IS*)pc->data;
492   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
493   /* submatrices of local problem */
494   Mat                    A_RV,A_VR,A_VV;
495   /* submatrices of local coarse problem */
496   Mat                    S_VV,S_CV,S_VC,S_CC;
497   /* working matrices */
498   Mat                    C_CR;
499   /* additional working stuff */
500   PC                     pc_R;
501   Mat                    F;
502   PetscBool              isLU,isCHOL,isILU;
503 
504   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
505   PetscScalar            *work;
506   PetscInt               *idx_V_B;
507   PetscInt               n,n_vertices,n_constraints;
508   PetscInt               i,n_R,n_D,n_B;
509   PetscBool              unsymmetric_check;
510   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
511   MatType                impMatType;
512   /* some shortcuts to scalars */
513   PetscScalar            one=1.0,m_one=-1.0;
514 
515   PetscFunctionBegin;
516   /* get number of vertices (corners plus constraints with change of basis)
517      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
518   n_vertices = pcbddc->n_actual_vertices;
519   n_constraints = pcbddc->local_primal_size-n_vertices;
520   /* Set Non-overlapping dimensions */
521   n_B = pcis->n_B; n_D = pcis->n - n_B;
522   n_R = pcis->n-n_vertices;
523 
524   /* Set types for local objects needed by BDDC precondtioner */
525   impMatType = MATSEQDENSE;
526 
527   /* vertices in boundary numbering */
528   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
529   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
530   if (i != n_vertices) {
531     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
532   }
533 
534   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
535   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
542   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
543   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
544 
545   unsymmetric_check = PETSC_FALSE;
546   /* allocate workspace */
547   n = 0;
548   if (n_constraints) {
549     n += n_R*n_constraints;
550   }
551   if (n_vertices) {
552     n = PetscMax(2*n_R*n_vertices,n);
553   }
554   if (!pcbddc->issym) {
555     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
556     unsymmetric_check = PETSC_TRUE;
557   }
558   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
559 
560   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
561   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
564   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
565   if (isLU || isILU || isCHOL) {
566     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
567   } else {
568     F = NULL;
569   }
570 
571   /* Precompute stuffs needed for preprocessing and application of BDDC*/
572   if (n_constraints) {
573     Mat M1,M2,M3;
574     IS  is_aux;
575     /* see if we can save some allocations */
576     if (pcbddc->local_auxmat2) {
577       PetscInt on_R,on_constraints;
578       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
579       if (on_R != n_R || on_constraints != n_constraints) {
580         PetscScalar *marray;
581 
582         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
583         ierr = PetscFree(marray);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
585         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
586       }
587     }
588     /* auxiliary matrices */
589     if (!pcbddc->local_auxmat2) {
590       PetscScalar *marray;
591 
592       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
594       marray += n_R*n_constraints;
595       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
596     }
597 
598     /* Extract constraints on R nodes: C_{CR}  */
599     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
600     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
601     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
602 
603     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
604     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
605     for (i=0;i<n_constraints;i++) {
606       const PetscScalar *row_cmat_values;
607       const PetscInt    *row_cmat_indices;
608       PetscInt          size_of_constraint,j;
609 
610       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
611       for (j=0;j<size_of_constraint;j++) {
612         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
613       }
614       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
615     }
616     if (F) {
617       Mat B;
618 
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
620       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
621       ierr = MatDestroy(&B);CHKERRQ(ierr);
622     } else {
623       PetscScalar *xarray;
624       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
625       for (i=0;i<n_constraints;i++) {
626         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
627         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
628         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
630         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
631       }
632       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
633     }
634 
635     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
636     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
637     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
639     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
640     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
641     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
642     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
643     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
644     ierr = MatDestroy(&M2);CHKERRQ(ierr);
645     ierr = MatDestroy(&M3);CHKERRQ(ierr);
646     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
647     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
648     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650   }
651 
652   /* Get submatrices from subdomain matrix */
653   if (n_vertices) {
654     Mat       newmat;
655     IS        is_aux;
656     PetscInt  ibs,mbs;
657     PetscBool issbaij;
658 
659     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
660     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
661     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
662     if (ibs != mbs) { /* need to convert to SEQAIJ */
663       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
665       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
666       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
667       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
668     } else {
669       /* this is safe */
670       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
671       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
672       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
673         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
674         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
675         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
676         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
677       } else {
678         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
679         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
680       }
681     }
682     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
683   }
684 
685   /* Matrix of coarse basis functions (local) */
686   if (pcbddc->coarse_phi_B) {
687     PetscInt on_B,on_primal,on_D=n_D;
688     if (pcbddc->coarse_phi_D) {
689       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
690     }
691     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
692     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
693       PetscScalar *marray;
694 
695       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
696       ierr = PetscFree(marray);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
700       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
701     }
702   }
703 
704   if (!pcbddc->coarse_phi_B) {
705     PetscScalar *marray;
706 
707     n = n_B*pcbddc->local_primal_size;
708     if (pcbddc->switch_static || pcbddc->dbg_flag) {
709       n += n_D*pcbddc->local_primal_size;
710     }
711     if (!pcbddc->issym) {
712       n *= 2;
713     }
714     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
715     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
716     n = n_B*pcbddc->local_primal_size;
717     if (pcbddc->switch_static || pcbddc->dbg_flag) {
718       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       n += n_D*pcbddc->local_primal_size;
720     }
721     if (!pcbddc->issym) {
722       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       if (pcbddc->switch_static || pcbddc->dbg_flag) {
724         n = n_B*pcbddc->local_primal_size;
725         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
726       }
727     } else {
728       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
729       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
730       if (pcbddc->switch_static || pcbddc->dbg_flag) {
731         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
732         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
733       }
734     }
735   }
736   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
737   /* vertices */
738   if (n_vertices) {
739     Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
740     PetscScalar  *x,*y;
741     PetscBLASInt B_N,B_one = 1;
742 
743     ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
744     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
745     ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
746     if (F) {
747       ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
748     } else {
749       ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
750       for (i=0;i<n_vertices;i++) {
751         ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
752         ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
753         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
754         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
755         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
756       }
757       ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
758     }
759     ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
760     /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
761     if (n_constraints) {
762       Mat B;
763       ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
764       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
765       ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
766       ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
767       ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
768       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
769       ierr = MatDestroy(&B);CHKERRQ(ierr);
770     }
771     ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
772     ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
773     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
774     ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
775     ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
776     ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
777     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
778     ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
779     ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
780     ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781     ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
782     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
783     /* coarse basis functions */
784     for (i=0;i<n_vertices;i++) {
785       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
786       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
787       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
788       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
789       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
790       y[n_B*i+idx_V_B[i]] = 1.0;
791       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
792       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
793 
794       if (pcbddc->switch_static || pcbddc->dbg_flag) {
795         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
796         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
797         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
799         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
800         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
801       }
802       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
803     }
804     ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
805     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
806   }
807 
808   if (n_constraints) {
809     Mat B;
810 
811     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
812     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
813     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
814     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
815     if (n_vertices) {
816       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
817     }
818     ierr = MatDestroy(&B);CHKERRQ(ierr);
819     /* coarse basis functions */
820     for (i=0;i<n_constraints;i++) {
821       PetscScalar *y;
822 
823       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
824       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
825       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
826       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
829       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
830 
831       if (pcbddc->switch_static || pcbddc->dbg_flag) {
832         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
833         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
834         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
837         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
838       }
839       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
840     }
841   }
842 
843   /* compute other basis functions for non-symmetric problems */
844   if (!pcbddc->issym) {
845     Mat B,X;
846 
847     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
848 
849     if (n_constraints) {
850       Mat S_VCT,S_CCT,B_C;
851 
852       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
853       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
854       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
855       if (n_vertices) {
856         Mat B_V;
857 
858         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
859         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
860         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
861         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
862       }
863       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
864     }
865     if (n_vertices) {
866       Mat          A_VRT;
867       PetscBLASInt B_N,B_one = 1;
868 
869       if (!n_constraints) { /* if there are no constraints, reset work */
870         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
871       }
872       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
873       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
874       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
875       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
876     }
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
879     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
880       for (i=0;i<pcbddc->local_primal_size;i++) {
881         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
882         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
883         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
884         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
885         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
886       }
887     } else {
888       for (i=0;i<pcbddc->local_primal_size;i++) {
889         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
890         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
891         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
892         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
893         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
894       }
895     }
896     ierr = MatDestroy(&B);CHKERRQ(ierr);
897     /* coarse basis functions */
898     for (i=0;i<pcbddc->local_primal_size;i++) {
899       PetscScalar *y;
900 
901       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
902       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
903       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
904       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906       if (i<n_vertices) {
907         y[n_B*i+idx_V_B[i]] = 1.0;
908       }
909       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
910       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
911 
912       if (pcbddc->switch_static || pcbddc->dbg_flag) {
913         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
914         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
915         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
918         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
919       }
920       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
921     }
922     ierr = MatDestroy(&X);CHKERRQ(ierr);
923   }
924 
925   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
926   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
927   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
928   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
929   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
930   /* Checking coarse_sub_mat and coarse basis functios */
931   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
932   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
933   if (pcbddc->dbg_flag) {
934     Mat         coarse_sub_mat;
935     Mat         AUXMAT,TM1,TM2,TM3,TM4;
936     Mat         coarse_phi_D,coarse_phi_B;
937     Mat         coarse_psi_D,coarse_psi_B;
938     Mat         A_II,A_BB,A_IB,A_BI;
939     MatType     checkmattype=MATSEQAIJ;
940     PetscReal   real_value;
941 
942     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
943     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
944     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
945     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
946     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
947     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
948     if (unsymmetric_check) {
949       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
950       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
951     }
952     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
953 
954     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
955     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
956     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
957     if (unsymmetric_check) {
958       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
959       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
960       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
961       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
962       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
963       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
964       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
965       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
966       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
967       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
968       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
969       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
970     } else {
971       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
972       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
973       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
974       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
975       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
976       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
977       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
978       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
979     }
980     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
981     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
982     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
983     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
984     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
985     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
986 
987     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
988     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
989     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
990     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
991     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
992     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
993     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
994     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
995     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
996     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
997     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
998     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
999     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1000     if (unsymmetric_check) {
1001       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1002       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1003     }
1004     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1005   }
1006 
1007   /* free memory */
1008   ierr = PetscFree(work);CHKERRQ(ierr);
1009   if (n_vertices) {
1010     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1011   }
1012   if (n_constraints) {
1013     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1014   }
1015   /* get back data */
1016   *coarse_submat_vals_n = coarse_submat_vals;
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1022 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1023 {
1024   Mat            *work_mat;
1025   IS             isrow_s,iscol_s;
1026   PetscBool      rsorted,csorted;
1027   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1028   PetscErrorCode ierr;
1029 
1030   PetscFunctionBegin;
1031   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1032   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1033   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1034   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1035 
1036   if (!rsorted) {
1037     const PetscInt *idxs;
1038     PetscInt *idxs_sorted,i;
1039 
1040     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1041     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1042     for (i=0;i<rsize;i++) {
1043       idxs_perm_r[i] = i;
1044     }
1045     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1046     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1047     for (i=0;i<rsize;i++) {
1048       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1049     }
1050     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1051     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1052   } else {
1053     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1054     isrow_s = isrow;
1055   }
1056 
1057   if (!csorted) {
1058     if (isrow == iscol) {
1059       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1060       iscol_s = isrow_s;
1061     } else {
1062       const PetscInt *idxs;
1063       PetscInt *idxs_sorted,i;
1064 
1065       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1066       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1067       for (i=0;i<csize;i++) {
1068         idxs_perm_c[i] = i;
1069       }
1070       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1071       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1072       for (i=0;i<csize;i++) {
1073         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1074       }
1075       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1076       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1077     }
1078   } else {
1079     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1080     iscol_s = iscol;
1081   }
1082 
1083   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1084 
1085   if (!rsorted || !csorted) {
1086     Mat      new_mat;
1087     IS       is_perm_r,is_perm_c;
1088 
1089     if (!rsorted) {
1090       PetscInt *idxs_r,i;
1091       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1092       for (i=0;i<rsize;i++) {
1093         idxs_r[idxs_perm_r[i]] = i;
1094       }
1095       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1096       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1097     } else {
1098       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1099     }
1100     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1101 
1102     if (!csorted) {
1103       if (isrow_s == iscol_s) {
1104         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1105         is_perm_c = is_perm_r;
1106       } else {
1107         PetscInt *idxs_c,i;
1108         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1109         for (i=0;i<csize;i++) {
1110           idxs_c[idxs_perm_c[i]] = i;
1111         }
1112         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1113         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1114       }
1115     } else {
1116       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1117     }
1118     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1119 
1120     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1121     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1122     work_mat[0] = new_mat;
1123     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1124     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1125   }
1126 
1127   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1128   *B = work_mat[0];
1129   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1130   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1131   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1137 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1138 {
1139   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1140   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1141   Mat            new_mat;
1142   IS             is_local,is_global;
1143   PetscInt       local_size;
1144   PetscBool      isseqaij;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1149   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1150   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1151   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1152   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1153   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1154   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1155 
1156   /* check */
1157   if (pcbddc->dbg_flag) {
1158     Vec       x,x_change;
1159     PetscReal error;
1160 
1161     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1162     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1163     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1164     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1165     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1167     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1168     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1170     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1171     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1172     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1173     ierr = VecDestroy(&x);CHKERRQ(ierr);
1174     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1175   }
1176 
1177   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1178   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1179   if (isseqaij) {
1180     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1181   } else {
1182     Mat work_mat;
1183     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1184     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1185     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1186   }
1187   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1188   /*
1189   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1190   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1191   */
1192   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1198 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1199 {
1200   PC_IS*         pcis = (PC_IS*)(pc->data);
1201   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1202   IS             is_aux1,is_aux2;
1203   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1204   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1205   PetscInt       vbs,bs;
1206   PetscBT        bitmask;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   /*
1211     No need to setup local scatters if
1212       - primal space is unchanged
1213         AND
1214       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1215         AND
1216       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1217   */
1218   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1219     PetscFunctionReturn(0);
1220   }
1221   /* destroy old objects */
1222   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1223   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1224   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1225   /* Set Non-overlapping dimensions */
1226   n_B = pcis->n_B; n_D = pcis->n - n_B;
1227   n_vertices = pcbddc->n_actual_vertices;
1228   /* create auxiliary bitmask */
1229   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1230   for (i=0;i<n_vertices;i++) {
1231     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1232   }
1233 
1234   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1235   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1236   for (i=0, n_R=0; i<pcis->n; i++) {
1237     if (!PetscBTLookup(bitmask,i)) {
1238       idx_R_local[n_R] = i;
1239       n_R++;
1240     }
1241   }
1242 
1243   /* Block code */
1244   vbs = 1;
1245   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1246   if (bs>1 && !(n_vertices%bs)) {
1247     PetscBool is_blocked = PETSC_TRUE;
1248     PetscInt  *vary;
1249     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1250     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1251     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1252     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1253     for (i=0; i<n_vertices; i++) {
1254       if (vary[i]!=0 && vary[i]!=bs) {
1255         is_blocked = PETSC_FALSE;
1256         break;
1257       }
1258     }
1259     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1260       vbs = bs;
1261       for (i=0;i<n_R/vbs;i++) {
1262         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1263       }
1264     }
1265     ierr = PetscFree(vary);CHKERRQ(ierr);
1266   }
1267   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1268   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1269 
1270   /* print some info if requested */
1271   if (pcbddc->dbg_flag) {
1272     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1273     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1274     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1275     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1276     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1277     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1278     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1279     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1280   }
1281 
1282   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1283   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1284   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1285   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1286   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1287   for (i=0; i<n_D; i++) {
1288     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1289   }
1290   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1291   for (i=0, j=0; i<n_R; i++) {
1292     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1293       aux_array1[j++] = i;
1294     }
1295   }
1296   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1297   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298   for (i=0, j=0; i<n_B; i++) {
1299     if (!PetscBTLookup(bitmask,is_indices[i])) {
1300       aux_array2[j++] = i;
1301     }
1302   }
1303   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1304   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1305   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1306   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1307   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1308 
1309   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1310     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1311     for (i=0, j=0; i<n_R; i++) {
1312       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1313         aux_array1[j++] = i;
1314       }
1315     }
1316     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1317     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1318     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1319   }
1320   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1321   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1328 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1329 {
1330   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1331   PC_IS          *pcis = (PC_IS*)pc->data;
1332   PC             pc_temp;
1333   Mat            A_RR;
1334   MatReuse       reuse;
1335   PetscScalar    m_one = -1.0;
1336   PetscReal      value;
1337   PetscInt       n_D,n_R,ibs,mbs;
1338   PetscBool      use_exact,use_exact_reduced,issbaij;
1339   PetscErrorCode ierr;
1340   /* prefixes stuff */
1341   char           dir_prefix[256],neu_prefix[256],str_level[16];
1342   size_t         len;
1343 
1344   PetscFunctionBegin;
1345 
1346   /* compute prefixes */
1347   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1348   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1349   if (!pcbddc->current_level) {
1350     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1351     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1352     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1353     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1354   } else {
1355     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1356     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1357     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1358     len -= 15; /* remove "pc_bddc_coarse_" */
1359     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1360     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1361     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1362     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1363     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1364     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1365     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1366     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1367   }
1368 
1369   /* DIRICHLET PROBLEM */
1370   if (dirichlet) {
1371     /* Matrix for Dirichlet problem is pcis->A_II */
1372     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1373     if (!pcbddc->ksp_D) { /* create object if not yet build */
1374       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1375       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1376       /* default */
1377       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1378       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1379       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1380       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1381       if (issbaij) {
1382         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1383       } else {
1384         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1385       }
1386       /* Allow user's customization */
1387       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1388       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1389     }
1390     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1391     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1392     if (!n_D) {
1393       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1394       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1395     }
1396     /* Set Up KSP for Dirichlet problem of BDDC */
1397     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1398     /* set ksp_D into pcis data */
1399     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1400     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1401     pcis->ksp_D = pcbddc->ksp_D;
1402   }
1403 
1404   /* NEUMANN PROBLEM */
1405   A_RR = 0;
1406   if (neumann) {
1407     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1408     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1409     if (pcbddc->ksp_R) { /* already created ksp */
1410       PetscInt nn_R;
1411       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1412       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1413       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1414       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1415         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1416         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1417         reuse = MAT_INITIAL_MATRIX;
1418       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1419         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1420           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1421           reuse = MAT_INITIAL_MATRIX;
1422         } else { /* safe to reuse the matrix */
1423           reuse = MAT_REUSE_MATRIX;
1424         }
1425       }
1426       /* last check */
1427       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1428         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1429         reuse = MAT_INITIAL_MATRIX;
1430       }
1431     } else { /* first time, so we need to create the matrix */
1432       reuse = MAT_INITIAL_MATRIX;
1433     }
1434     /* extract A_RR */
1435     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1436     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1437     if (ibs != mbs) {
1438       Mat newmat;
1439       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1440       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1441       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1442     } else {
1443       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1444     }
1445     if (!pcbddc->ksp_R) { /* create object if not present */
1446       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1447       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1448       /* default */
1449       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1450       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1451       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1452       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1453       if (issbaij) {
1454         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1455       } else {
1456         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1457       }
1458       /* Allow user's customization */
1459       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1460       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1461     }
1462     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1463     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1464     if (!n_R) {
1465       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1466       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1467     }
1468     /* Set Up KSP for Neumann problem of BDDC */
1469     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1470   }
1471 
1472   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1473   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1474     if (pcbddc->dbg_flag) {
1475       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1478     }
1479     if (dirichlet) { /* Dirichlet */
1480       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1481       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1482       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1483       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1484       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1485       /* need to be adapted? */
1486       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1487       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1488       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1489       /* print info */
1490       if (pcbddc->dbg_flag) {
1491         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1492         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1493       }
1494       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1495         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1496       }
1497     }
1498     if (neumann) { /* Neumann */
1499       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1500       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1501       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1502       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1503       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1504       /* need to be adapted? */
1505       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1506       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1507       /* print info */
1508       if (pcbddc->dbg_flag) {
1509         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1510         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1511       }
1512       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1513         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1514       }
1515     }
1516   }
1517   /* free Neumann problem's matrix */
1518   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1524 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1525 {
1526   PetscErrorCode ierr;
1527   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1528 
1529   PetscFunctionBegin;
1530   if (applytranspose) {
1531     if (pcbddc->local_auxmat1) {
1532       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1533       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1534     }
1535     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1536   } else {
1537     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1538     if (pcbddc->local_auxmat1) {
1539       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1540       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1541     }
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1547 #undef __FUNCT__
1548 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1549 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1550 {
1551   PetscErrorCode ierr;
1552   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1553   PC_IS*            pcis = (PC_IS*)  (pc->data);
1554   const PetscScalar zero = 0.0;
1555 
1556   PetscFunctionBegin;
1557   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1558   if (applytranspose) {
1559     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1560     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1561   } else {
1562     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1563     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1564   }
1565   /* start communications from local primal nodes to rhs of coarse solver */
1566   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1567   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1571   /* TODO remove null space when doing multilevel */
1572   if (pcbddc->coarse_ksp) {
1573     if (applytranspose) {
1574       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1575     } else {
1576       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1577     }
1578   }
1579 
1580   /* Local solution on R nodes */
1581   if (pcis->n) {
1582     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1583     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1584     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1585     if (pcbddc->switch_static) {
1586       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1588     }
1589     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1590     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1591     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593     if (pcbddc->switch_static) {
1594       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1595       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596     }
1597   }
1598 
1599   /* communications from coarse sol to local primal nodes */
1600   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1601   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1602 
1603   /* Sum contributions from two levels */
1604   if (applytranspose) {
1605     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1606     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1607   } else {
1608     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1609     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1615 #undef __FUNCT__
1616 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1617 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1618 {
1619   PetscErrorCode ierr;
1620   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1621   PetscScalar    *array,*array2;
1622   Vec            from,to;
1623 
1624   PetscFunctionBegin;
1625   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1626     from = pcbddc->coarse_vec;
1627     to = pcbddc->vec1_P;
1628     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1629       Vec tvec;
1630       PetscInt lsize;
1631       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1632       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1633       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1634       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1635       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1636       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1637       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1638     }
1639   } else { /* from local to global -> put data in coarse right hand side */
1640     from = pcbddc->vec1_P;
1641     to = pcbddc->coarse_vec;
1642   }
1643   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1649 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1650 {
1651   PetscErrorCode ierr;
1652   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1653   PetscScalar    *array,*array2;
1654   Vec            from,to;
1655 
1656   PetscFunctionBegin;
1657   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1658     from = pcbddc->coarse_vec;
1659     to = pcbddc->vec1_P;
1660   } else { /* from local to global -> put data in coarse right hand side */
1661     from = pcbddc->vec1_P;
1662     to = pcbddc->coarse_vec;
1663   }
1664   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1665   if (smode == SCATTER_FORWARD) {
1666     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1667       Vec tvec;
1668       PetscInt lsize;
1669       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1670       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1671       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1672       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1673       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1674       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1675       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1676     }
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 /* uncomment for testing purposes */
1682 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1683 #undef __FUNCT__
1684 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1685 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1686 {
1687   PetscErrorCode    ierr;
1688   PC_IS*            pcis = (PC_IS*)(pc->data);
1689   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1690   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1691   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1692   MatType           impMatType=MATSEQAIJ;
1693   /* one and zero */
1694   PetscScalar       one=1.0,zero=0.0;
1695   /* space to store constraints and their local indices */
1696   PetscScalar       *temp_quadrature_constraint;
1697   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1698   /* iterators */
1699   PetscInt          i,j,k,total_counts,temp_start_ptr;
1700   /* stuff to store connected components stored in pcbddc->mat_graph */
1701   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1702   PetscInt          n_ISForFaces,n_ISForEdges;
1703   /* near null space stuff */
1704   MatNullSpace      nearnullsp;
1705   const Vec         *nearnullvecs;
1706   Vec               *localnearnullsp;
1707   PetscBool         nnsp_has_cnst;
1708   PetscInt          nnsp_size;
1709   PetscScalar       *array;
1710   /* BLAS integers */
1711   PetscBLASInt      lwork,lierr;
1712   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1713   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1714   /* LAPACK working arrays for SVD or POD */
1715   PetscBool         skip_lapack;
1716   PetscScalar       *work;
1717   PetscReal         *singular_vals;
1718 #if defined(PETSC_USE_COMPLEX)
1719   PetscReal         *rwork;
1720 #endif
1721 #if defined(PETSC_MISSING_LAPACK_GESVD)
1722   PetscBLASInt      Blas_one_2=1;
1723   PetscScalar       *temp_basis,*correlation_mat;
1724 #else
1725   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1726   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1727 #endif
1728   /* reuse */
1729   PetscInt          olocal_primal_size;
1730   PetscInt          *oprimal_indices_local_idxs;
1731   /* change of basis */
1732   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1733   PetscBool         boolforchange,qr_needed;
1734   PetscBT           touched,change_basis,qr_needed_idx;
1735   /* auxiliary stuff */
1736   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1737   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1738   PetscScalar       *temp_quadrature_constraint_work=NULL;
1739   /* some quantities */
1740   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1741   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1742 
1743 
1744   PetscFunctionBegin;
1745   /* Destroy Mat objects computed previously */
1746   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1747   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1748   /* TODO synch with adaptive selection */
1749   /* Get index sets for faces, edges and vertices from graph */
1750   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1751   /* free unneeded index sets */
1752   if (!pcbddc->use_vertices) {
1753     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1754   }
1755   if (!pcbddc->use_edges) {
1756     for (i=0;i<n_ISForEdges;i++) {
1757       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1758     }
1759     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1760     n_ISForEdges = 0;
1761   }
1762   if (!pcbddc->use_faces) {
1763     for (i=0;i<n_ISForFaces;i++) {
1764       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1765     }
1766     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1767     n_ISForFaces = 0;
1768   }
1769   /* HACKS (the following two blocks of code) */
1770   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1771     pcbddc->use_change_of_basis = PETSC_TRUE;
1772     if (!ISForEdges) {
1773       pcbddc->use_change_on_faces = PETSC_TRUE;
1774     }
1775   }
1776   if (pcbddc->NullSpace) {
1777     /* use_change_of_basis should be consistent among processors */
1778     PetscBool tbool[2],gbool[2];
1779     tbool [0] = pcbddc->use_change_of_basis;
1780     tbool [1] = pcbddc->use_change_on_faces;
1781     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1782     pcbddc->use_change_of_basis = gbool[0];
1783     pcbddc->use_change_on_faces = gbool[1];
1784   }
1785   /* print some info */
1786   if (pcbddc->dbg_flag) {
1787     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1788     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1789     i = 0;
1790     if (ISForVertices) {
1791       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1792     }
1793     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1794     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1795     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1796     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1797   }
1798 
1799   if (!pcbddc->adaptive_selection) {
1800     /* check if near null space is attached to global mat */
1801     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1802     if (nearnullsp) {
1803       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1804       /* remove any stored info */
1805       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1806       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1807       /* store information for BDDC solver reuse */
1808       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1809       pcbddc->onearnullspace = nearnullsp;
1810       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1811       for (i=0;i<nnsp_size;i++) {
1812         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1813       }
1814     } else { /* if near null space is not provided BDDC uses constants by default */
1815       nnsp_size = 0;
1816       nnsp_has_cnst = PETSC_TRUE;
1817     }
1818     /* get max number of constraints on a single cc */
1819     max_constraints = nnsp_size;
1820     if (nnsp_has_cnst) max_constraints++;
1821 
1822     /*
1823          Evaluate maximum storage size needed by the procedure
1824          - temp_indices will contain start index of each constraint stored as follows
1825          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1826          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1827          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1828                                                                                                                                                            */
1829     total_counts = n_ISForFaces+n_ISForEdges;
1830     total_counts *= max_constraints;
1831     n_vertices = 0;
1832     if (ISForVertices) {
1833       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1834     }
1835     total_counts += n_vertices;
1836     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1837     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1838     total_counts = 0;
1839     max_size_of_constraint = 0;
1840     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1841       if (i<n_ISForEdges) {
1842         used_IS = &ISForEdges[i];
1843       } else {
1844         used_IS = &ISForFaces[i-n_ISForEdges];
1845       }
1846       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1847       total_counts += j;
1848       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1849     }
1850     total_counts *= max_constraints;
1851     total_counts += n_vertices;
1852     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1853     /* get local part of global near null space vectors */
1854     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1855     for (k=0;k<nnsp_size;k++) {
1856       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1857       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1858       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1859     }
1860 
1861     /* whether or not to skip lapack calls */
1862     skip_lapack = PETSC_TRUE;
1863     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1864 
1865     /* allocate some auxiliary stuff */
1866     if (!skip_lapack || pcbddc->use_qr_single) {
1867       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1868     }
1869 
1870     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1871     if (!skip_lapack) {
1872       PetscScalar temp_work;
1873 
1874 #if defined(PETSC_MISSING_LAPACK_GESVD)
1875       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1876       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1877       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1878       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1879 #if defined(PETSC_USE_COMPLEX)
1880       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1881 #endif
1882       /* now we evaluate the optimal workspace using query with lwork=-1 */
1883       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1884       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1885       lwork = -1;
1886       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1887 #if !defined(PETSC_USE_COMPLEX)
1888       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1889 #else
1890       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1891 #endif
1892       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1893       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1894 #else /* on missing GESVD */
1895       /* SVD */
1896       PetscInt max_n,min_n;
1897       max_n = max_size_of_constraint;
1898       min_n = max_constraints;
1899       if (max_size_of_constraint < max_constraints) {
1900         min_n = max_size_of_constraint;
1901         max_n = max_constraints;
1902       }
1903       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1904 #if defined(PETSC_USE_COMPLEX)
1905       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1906 #endif
1907       /* now we evaluate the optimal workspace using query with lwork=-1 */
1908       lwork = -1;
1909       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1910       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1911       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1912       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1913 #if !defined(PETSC_USE_COMPLEX)
1914       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1915 #else
1916       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1917 #endif
1918       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1919       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1920 #endif /* on missing GESVD */
1921       /* Allocate optimal workspace */
1922       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1923       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1924     }
1925     /* Now we can loop on constraining sets */
1926     total_counts = 0;
1927     temp_indices[0] = 0;
1928     /* vertices */
1929     if (ISForVertices) {
1930       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1931       if (nnsp_has_cnst) { /* consider all vertices */
1932         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1933         for (i=0;i<n_vertices;i++) {
1934           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1935           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1936           total_counts++;
1937         }
1938       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1939         PetscBool used_vertex;
1940         for (i=0;i<n_vertices;i++) {
1941           used_vertex = PETSC_FALSE;
1942           k = 0;
1943           while (!used_vertex && k<nnsp_size) {
1944             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1945             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1946               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1947               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1948               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1949               total_counts++;
1950               used_vertex = PETSC_TRUE;
1951             }
1952             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1953             k++;
1954           }
1955         }
1956       }
1957       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1958       n_vertices = total_counts;
1959     }
1960 
1961     /* edges and faces */
1962     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1963       if (ncc<n_ISForEdges) {
1964         used_IS = &ISForEdges[ncc];
1965         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1966       } else {
1967         used_IS = &ISForFaces[ncc-n_ISForEdges];
1968         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1969       }
1970       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1971       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1972       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1973       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1974       /* change of basis should not be performed on local periodic nodes */
1975       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1976       if (nnsp_has_cnst) {
1977         PetscScalar quad_value;
1978         temp_constraints++;
1979         if (!pcbddc->use_nnsp_true) {
1980           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1981         } else {
1982           quad_value = 1.0;
1983         }
1984         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1985         for (j=0;j<size_of_constraint;j++) {
1986           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1987         }
1988         /* sort by global ordering if using lapack subroutines (not needed!) */
1989         if (!skip_lapack || pcbddc->use_qr_single) {
1990           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1991           for (j=0;j<size_of_constraint;j++) {
1992             permutation[j]=j;
1993           }
1994           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1995           for (j=0;j<size_of_constraint;j++) {
1996             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
1997           }
1998           for (j=0;j<size_of_constraint;j++) {
1999             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2000             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2001           }
2002           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2003           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2004         }
2005         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2006         total_counts++;
2007       }
2008       for (k=0;k<nnsp_size;k++) {
2009         PetscReal real_value;
2010         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2011         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2012         for (j=0;j<size_of_constraint;j++) {
2013           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2014         }
2015         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2016         /* check if array is null on the connected component */
2017         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2018         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2019         if (real_value > 0.0) { /* keep indices and values */
2020           /* sort by global ordering if using lapack subroutines */
2021           if (!skip_lapack || pcbddc->use_qr_single) {
2022             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2023             for (j=0;j<size_of_constraint;j++) {
2024               permutation[j]=j;
2025             }
2026             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2027             for (j=0;j<size_of_constraint;j++) {
2028               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2029               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2030             }
2031             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2032             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2033           }
2034           temp_constraints++;
2035           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2036           total_counts++;
2037         }
2038       }
2039       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2040       valid_constraints = temp_constraints;
2041       if (!pcbddc->use_nnsp_true && temp_constraints) {
2042         if (temp_constraints == 1) { /* just normalize the constraint */
2043           PetscScalar norm;
2044           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2045           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2046           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2047           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2048         } else { /* perform SVD */
2049           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2050 
2051 #if defined(PETSC_MISSING_LAPACK_GESVD)
2052           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2053              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2054              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2055                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2056                 from that computed using LAPACKgesvd
2057              -> This is due to a different computation of eigenvectors in LAPACKheev
2058              -> The quality of the POD-computed basis will be the same */
2059           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2060           /* Store upper triangular part of correlation matrix */
2061           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2062           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2063           for (j=0;j<temp_constraints;j++) {
2064             for (k=0;k<j+1;k++) {
2065               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2066             }
2067           }
2068           /* compute eigenvalues and eigenvectors of correlation matrix */
2069           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2070           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2071 #if !defined(PETSC_USE_COMPLEX)
2072           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2073 #else
2074           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2075 #endif
2076           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2077           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2078           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2079           j = 0;
2080           while (j < temp_constraints && singular_vals[j] < tol) j++;
2081           total_counts = total_counts-j;
2082           valid_constraints = temp_constraints-j;
2083           /* scale and copy POD basis into used quadrature memory */
2084           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2085           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2086           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2087           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2088           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2089           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2090           if (j<temp_constraints) {
2091             PetscInt ii;
2092             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2093             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2094             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2095             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2096             for (k=0;k<temp_constraints-j;k++) {
2097               for (ii=0;ii<size_of_constraint;ii++) {
2098                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2099               }
2100             }
2101           }
2102 #else  /* on missing GESVD */
2103           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2104           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2105           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2106           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2107 #if !defined(PETSC_USE_COMPLEX)
2108           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2109 #else
2110           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2111 #endif
2112           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2113           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2114           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2115           k = temp_constraints;
2116           if (k > size_of_constraint) k = size_of_constraint;
2117           j = 0;
2118           while (j < k && singular_vals[k-j-1] < tol) j++;
2119           valid_constraints = k-j;
2120           total_counts = total_counts-temp_constraints+valid_constraints;
2121 #endif /* on missing GESVD */
2122         }
2123       }
2124       /* setting change_of_basis flag is safe now */
2125       if (boolforchange) {
2126         for (j=0;j<valid_constraints;j++) {
2127           PetscBTSet(change_basis,total_counts-j-1);
2128         }
2129       }
2130     }
2131     /* free workspace */
2132     if (!skip_lapack || pcbddc->use_qr_single) {
2133       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2134     }
2135     if (!skip_lapack) {
2136       ierr = PetscFree(work);CHKERRQ(ierr);
2137 #if defined(PETSC_USE_COMPLEX)
2138       ierr = PetscFree(rwork);CHKERRQ(ierr);
2139 #endif
2140       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2141 #if defined(PETSC_MISSING_LAPACK_GESVD)
2142       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2143       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2144 #endif
2145     }
2146     for (k=0;k<nnsp_size;k++) {
2147       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2148     }
2149     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2150   } else {
2151     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2152     PetscInt        cum = 0;
2153 
2154     total_counts = 0;
2155     n_vertices = 0;
2156     if (sub_schurs->is_Ej_com) {
2157       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2158     }
2159     max_constraints = 0;
2160     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2161       total_counts += pcbddc->adaptive_constraints_n[i];
2162       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2163     }
2164     temp_indices = pcbddc->adaptive_constraints_ptrs;
2165     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2166     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2167 
2168 #if 0
2169     printf("Found %d totals\n",total_counts);
2170     for (i=0;i<total_counts;i++) {
2171       printf("const %d, start %d",i,temp_indices[i]);
2172       printf(" end %d:\n",temp_indices[i+1]);
2173       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2174         printf("  idxs %d",temp_indices_to_constraint[j]);
2175         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2176       }
2177     }
2178     for (i=0;i<n_vertices;i++) {
2179       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2180     }
2181     for (i=0;i<sub_schurs->n_subs;i++) {
2182       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2183     }
2184 #endif
2185 
2186     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2187     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2188     /* Change of basis */
2189     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2190     if (pcbddc->use_change_of_basis) {
2191       cum = n_vertices;
2192       for (i=0;i<sub_schurs->n_subs;i++) {
2193         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2194           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2195             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2196           }
2197         }
2198         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2199       }
2200     }
2201   }
2202 
2203   /* free index sets of faces, edges and vertices */
2204   for (i=0;i<n_ISForFaces;i++) {
2205     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2206   }
2207   if (n_ISForFaces) {
2208     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2209   }
2210   for (i=0;i<n_ISForEdges;i++) {
2211     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2212   }
2213   if (n_ISForEdges) {
2214     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2215   }
2216   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2217 
2218   /* map temp_indices_to_constraint in boundary numbering */
2219   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2220   if (i != temp_indices[total_counts]) {
2221     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2222   }
2223 
2224   /* set quantities in pcbddc data structure and store previous primal size */
2225   /* n_vertices defines the number of subdomain corners in the primal space */
2226   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2227   olocal_primal_size = pcbddc->local_primal_size;
2228   pcbddc->local_primal_size = total_counts;
2229   pcbddc->n_vertices = n_vertices;
2230   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2231 
2232   /* Create constraint matrix */
2233   /* The constraint matrix is used to compute the l2g map of primal dofs */
2234   /* so we need to set it up properly either with or without change of basis */
2235   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2236   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
2237   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2238   /* array to compute a local numbering of constraints : vertices first then constraints */
2239   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2240   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2241   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2242   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2243   /* auxiliary stuff for basis change */
2244   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2245   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2246 
2247   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2248   total_primal_vertices=0;
2249   for (i=0;i<pcbddc->local_primal_size;i++) {
2250     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2251     if (size_of_constraint == 1) {
2252       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2253       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2254       aux_primal_minloc[total_primal_vertices]=0;
2255       total_primal_vertices++;
2256     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2257       PetscInt min_loc,min_index;
2258       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2259       /* find first untouched local node */
2260       k = 0;
2261       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2262       min_index = global_indices[k];
2263       min_loc = k;
2264       /* search the minimum among global nodes already untouched on the cc */
2265       for (k=1;k<size_of_constraint;k++) {
2266         /* there can be more than one constraint on a single connected component */
2267         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2268           min_index = global_indices[k];
2269           min_loc = k;
2270         }
2271       }
2272       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2273       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2274       aux_primal_minloc[total_primal_vertices]=min_loc;
2275       total_primal_vertices++;
2276     }
2277   }
2278   /* determine if a QR strategy is needed for change of basis */
2279   qr_needed = PETSC_FALSE;
2280   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2281   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2282     if (PetscBTLookup(change_basis,i)) {
2283       if (!pcbddc->use_qr_single) {
2284         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2285         j = 0;
2286         for (k=0;k<size_of_constraint;k++) {
2287           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2288             j++;
2289           }
2290         }
2291         /* found more than one primal dof on the cc */
2292         if (j > 1) {
2293           PetscBTSet(qr_needed_idx,i);
2294           qr_needed = PETSC_TRUE;
2295         }
2296       } else {
2297         PetscBTSet(qr_needed_idx,i);
2298         qr_needed = PETSC_TRUE;
2299       }
2300     }
2301   }
2302   /* free workspace */
2303   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2304 
2305   /* permute indices in order to have a sorted set of vertices */
2306   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2307 
2308   /* nonzero structure of constraint matrix */
2309   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2310   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2311   j=total_primal_vertices;
2312   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2313     if (!PetscBTLookup(change_basis,i)) {
2314       nnz[j]=temp_indices[i+1]-temp_indices[i];
2315       j++;
2316     }
2317   }
2318   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2319   ierr = PetscFree(nnz);CHKERRQ(ierr);
2320   /* set values in constraint matrix */
2321   for (i=0;i<total_primal_vertices;i++) {
2322     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2323   }
2324   total_counts = total_primal_vertices;
2325   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2326     if (!PetscBTLookup(change_basis,i)) {
2327       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2328       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2329       total_counts++;
2330     }
2331   }
2332   /* assembling */
2333   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335   /*
2336   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2337   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2338   */
2339   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2340   if (pcbddc->use_change_of_basis) {
2341     /* dual and primal dofs on a single cc */
2342     PetscInt     dual_dofs,primal_dofs;
2343     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2344     PetscInt     primal_counter;
2345     /* working stuff for GEQRF */
2346     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2347     PetscBLASInt lqr_work;
2348     /* working stuff for UNGQR */
2349     PetscScalar  *gqr_work,lgqr_work_t;
2350     PetscBLASInt lgqr_work;
2351     /* working stuff for TRTRS */
2352     PetscScalar  *trs_rhs;
2353     PetscBLASInt Blas_NRHS;
2354     /* pointers for values insertion into change of basis matrix */
2355     PetscInt     *start_rows,*start_cols;
2356     PetscScalar  *start_vals;
2357     /* working stuff for values insertion */
2358     PetscBT      is_primal;
2359     /* matrix sizes */
2360     PetscInt     global_size,local_size;
2361     /* temporary change of basis */
2362     Mat          localChangeOfBasisMatrix;
2363 
2364     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2365     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2366     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2367     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2368 
2369     /* nonzeros for local mat */
2370     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2371     for (i=0;i<pcis->n;i++) nnz[i]=1;
2372     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2373       if (PetscBTLookup(change_basis,i)) {
2374         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2375         if (PetscBTLookup(qr_needed_idx,i)) {
2376           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2377         } else {
2378           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2379           /* get local primal index on the cc */
2380           j = 0;
2381           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2382           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2383         }
2384       }
2385     }
2386     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2387     ierr = PetscFree(nnz);CHKERRQ(ierr);
2388     /* Set initial identity in the matrix */
2389     for (i=0;i<pcis->n;i++) {
2390       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2391     }
2392 
2393     if (pcbddc->dbg_flag) {
2394       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2395       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2396     }
2397 
2398 
2399     /* Now we loop on the constraints which need a change of basis */
2400     /*
2401        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2402        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2403 
2404        Basic blocks of change of basis matrix T computed by
2405 
2406           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2407 
2408             | 1        0   ...        0         s_1/S |
2409             | 0        1   ...        0         s_2/S |
2410             |              ...                        |
2411             | 0        ...            1     s_{n-1}/S |
2412             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2413 
2414             with S = \sum_{i=1}^n s_i^2
2415             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2416                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2417 
2418           - QR decomposition of constraints otherwise
2419     */
2420     if (qr_needed) {
2421       /* space to store Q */
2422       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2423       /* first we issue queries for optimal work */
2424       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2425       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2426       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2427       lqr_work = -1;
2428       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2429       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2430       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2431       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2432       lgqr_work = -1;
2433       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2434       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2435       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2436       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2437       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2438       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2439       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2440       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2441       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2442       /* array to store scaling factors for reflectors */
2443       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2444       /* array to store rhs and solution of triangular solver */
2445       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2446       /* allocating workspace for check */
2447       if (pcbddc->dbg_flag) {
2448         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2449       }
2450     }
2451     /* array to store whether a node is primal or not */
2452     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2453     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2454     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2455     if (i != total_primal_vertices) {
2456       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2457     }
2458     for (i=0;i<total_primal_vertices;i++) {
2459       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2460     }
2461     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2462 
2463     /* loop on constraints and see whether or not they need a change of basis and compute it */
2464     /* -> using implicit ordering contained in temp_indices data */
2465     total_counts = pcbddc->n_vertices;
2466     primal_counter = total_counts;
2467     while (total_counts<pcbddc->local_primal_size) {
2468       primal_dofs = 1;
2469       if (PetscBTLookup(change_basis,total_counts)) {
2470         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2471         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2472           primal_dofs++;
2473         }
2474         /* get constraint info */
2475         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2476         dual_dofs = size_of_constraint-primal_dofs;
2477 
2478         if (pcbddc->dbg_flag) {
2479           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2480         }
2481 
2482         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2483 
2484           /* copy quadrature constraints for change of basis check */
2485           if (pcbddc->dbg_flag) {
2486             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2487           }
2488           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2489           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2490 
2491           /* compute QR decomposition of constraints */
2492           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2493           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2494           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2495           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2496           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2497           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2498           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2499 
2500           /* explictly compute R^-T */
2501           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2502           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2503           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2504           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2505           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2506           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2507           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2508           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2509           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2510           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2511 
2512           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2513           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2514           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2515           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2516           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2517           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2518           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2519           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2520           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2521 
2522           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2523              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2524              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2525           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2526           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2527           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2528           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2529           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2530           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2531           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2532           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2533           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2534           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2535 
2536           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2537           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2538           /* insert cols for primal dofs */
2539           for (j=0;j<primal_dofs;j++) {
2540             start_vals = &qr_basis[j*size_of_constraint];
2541             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2542             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2543           }
2544           /* insert cols for dual dofs */
2545           for (j=0,k=0;j<dual_dofs;k++) {
2546             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2547               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2548               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2549               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2550               j++;
2551             }
2552           }
2553 
2554           /* check change of basis */
2555           if (pcbddc->dbg_flag) {
2556             PetscInt   ii,jj;
2557             PetscBool valid_qr=PETSC_TRUE;
2558             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2559             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2560             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2561             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2562             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2563             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2564             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2565             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2566             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2567             for (jj=0;jj<size_of_constraint;jj++) {
2568               for (ii=0;ii<primal_dofs;ii++) {
2569                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2570                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2571               }
2572             }
2573             if (!valid_qr) {
2574               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2575               for (jj=0;jj<size_of_constraint;jj++) {
2576                 for (ii=0;ii<primal_dofs;ii++) {
2577                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2578                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2579                   }
2580                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2581                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2582                   }
2583                 }
2584               }
2585             } else {
2586               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2587             }
2588           }
2589         } else { /* simple transformation block */
2590           PetscInt    row,col;
2591           PetscScalar val,norm;
2592 
2593           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2594           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2595           for (j=0;j<size_of_constraint;j++) {
2596             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2597             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2598             if (!PetscBTLookup(is_primal,row_B)) {
2599               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2600               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2601               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2602             } else {
2603               for (k=0;k<size_of_constraint;k++) {
2604                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2605                 if (row != col) {
2606                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2607                 } else {
2608                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2609                 }
2610                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2611               }
2612             }
2613           }
2614           if (pcbddc->dbg_flag) {
2615             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2616           }
2617         }
2618         /* increment primal counter */
2619         primal_counter += primal_dofs;
2620       } else {
2621         if (pcbddc->dbg_flag) {
2622           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2623         }
2624       }
2625       /* increment constraint counter total_counts */
2626       total_counts += primal_dofs;
2627     }
2628 
2629     /* free workspace */
2630     if (qr_needed) {
2631       if (pcbddc->dbg_flag) {
2632         ierr = PetscFree(work);CHKERRQ(ierr);
2633       }
2634       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2635       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2636       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2637       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2638       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2639     }
2640     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2641     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2642     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2643 
2644     /* assembling of global change of variable */
2645     {
2646       Mat tmat;
2647       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2648       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2649       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2650       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2651       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2652       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2653       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2654       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2655       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2656       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2657       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2658       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2659       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2660       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2661       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2662       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2663     }
2664     /* check */
2665     if (pcbddc->dbg_flag) {
2666       PetscReal error;
2667       Vec       x,x_change;
2668 
2669       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2670       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2671       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2672       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2673       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2674       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2675       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2676       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2677       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2678       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2679       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2680       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2681       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2682       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2683       ierr = VecDestroy(&x);CHKERRQ(ierr);
2684       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2685     }
2686 
2687     /* adapt sub_schurs computed (if any) */
2688     if (pcbddc->use_deluxe_scaling) {
2689       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2690       if (sub_schurs->n_subs_par_g) {
2691         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2692       }
2693       if (sub_schurs->S_Ej_all) {
2694         Mat S_1,S_2,tmat;
2695         IS is_all_N;
2696 
2697         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2698         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2699         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2700         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2701         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2702         sub_schurs->S_Ej_all = S_1;
2703         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2704         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2705         sub_schurs->sum_S_Ej_all = S_2;
2706         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2707       }
2708     }
2709     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2710   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2711     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2712     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2713   }
2714 
2715   /* set up change of basis context */
2716   if (pcbddc->ChangeOfBasisMatrix) {
2717     PCBDDCChange_ctx change_ctx;
2718 
2719     if (!pcbddc->new_global_mat) {
2720       PetscInt global_size,local_size;
2721 
2722       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2723       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2724       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2725       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2726       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2727       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2728       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2729       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2730       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2731     } else {
2732       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2733       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2734       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2735     }
2736     if (!pcbddc->user_ChangeOfBasisMatrix) {
2737       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2738       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2739     } else {
2740       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2741       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2742     }
2743     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2744     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2745     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2746     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2747   }
2748 
2749   /* get indices in local ordering for vertices and constraints */
2750   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2751     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2752     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2753   }
2754   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2755   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2756   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2757   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2758   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2759   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2760   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2761   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2762   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2763   /* set quantities in PCBDDC data struct */
2764   pcbddc->n_actual_vertices = i;
2765   /* check if a new primal space has been introduced */
2766   pcbddc->new_primal_space_local = PETSC_TRUE;
2767   if (olocal_primal_size == pcbddc->local_primal_size) {
2768     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2769     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2770     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2771   }
2772   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2773   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2774 
2775   /* flush dbg viewer */
2776   if (pcbddc->dbg_flag) {
2777     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2778   }
2779 
2780   /* free workspace */
2781   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2782   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2783   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2784   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2785   if (!pcbddc->adaptive_selection) {
2786     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2787     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2788   } else {
2789     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2790                       pcbddc->adaptive_constraints_ptrs,
2791                       pcbddc->adaptive_constraints_idxs,
2792                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2793     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2794   }
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2800 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2801 {
2802   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2803   PC_IS       *pcis = (PC_IS*)pc->data;
2804   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2805   PetscInt    ierr,i,vertex_size;
2806   PetscViewer viewer=pcbddc->dbg_viewer;
2807 
2808   PetscFunctionBegin;
2809   /* Reset previously computed graph */
2810   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2811   /* Init local Graph struct */
2812   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2813 
2814   /* Check validity of the csr graph passed in by the user */
2815   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2816     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2817   }
2818 
2819   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2820   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2821     Mat       mat_adj;
2822     PetscInt  *xadj,*adjncy;
2823     PetscInt  nvtxs;
2824     PetscBool flg_row=PETSC_TRUE;
2825 
2826     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2827     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2828     if (!flg_row) {
2829       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2830     }
2831     if (pcbddc->use_local_adj) {
2832       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2833       pcbddc->computed_rowadj = PETSC_TRUE;
2834     } else { /* just compute subdomain's connected components */
2835       IS                     is_dummy;
2836       ISLocalToGlobalMapping l2gmap_dummy;
2837       PetscInt               j,sum;
2838       PetscInt               *cxadj,*cadjncy;
2839       const PetscInt         *idxs;
2840       PCBDDCGraph            graph;
2841       PetscBT                is_on_boundary;
2842 
2843       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2844       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2845       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2846       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2847       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2848       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2849       graph->xadj = xadj;
2850       graph->adjncy = adjncy;
2851       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2852       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2853 
2854       if (pcbddc->dbg_flag) {
2855         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2856         for (i=0;i<graph->ncc;i++) {
2857           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2858         }
2859         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2860       }
2861 
2862       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2863       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2864       for (i=0;i<pcis->n_B;i++) {
2865         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2866       }
2867       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2868 
2869       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2870       sum = 0;
2871       for (i=0;i<graph->ncc;i++) {
2872         PetscInt sizecc = 0;
2873         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2874           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2875             sizecc++;
2876           }
2877         }
2878         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2879           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2880             cxadj[graph->queue[j]] = sizecc;
2881           }
2882         }
2883         sum += sizecc*sizecc;
2884       }
2885       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2886       sum = 0;
2887       for (i=0;i<nvtxs;i++) {
2888         PetscInt temp = cxadj[i];
2889         cxadj[i] = sum;
2890         sum += temp;
2891       }
2892       cxadj[nvtxs] = sum;
2893       for (i=0;i<graph->ncc;i++) {
2894         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2895           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2896             PetscInt k,sizecc = 0;
2897             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2898               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2899                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2900                 sizecc++;
2901               }
2902             }
2903           }
2904         }
2905       }
2906       if (nvtxs) {
2907         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2908       } else {
2909         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2910         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2911       }
2912       graph->xadj = 0;
2913       graph->adjncy = 0;
2914       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2915       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2916     }
2917     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2918     if (!flg_row) {
2919       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2920     }
2921     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2922   }
2923 
2924   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2925   vertex_size = 1;
2926   if (pcbddc->user_provided_isfordofs) {
2927     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2928       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2929       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2930         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2931         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2932       }
2933       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2934       pcbddc->n_ISForDofs = 0;
2935       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2936     }
2937     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2938     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2939   } else {
2940     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2941       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2942       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2943       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2944         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2945       }
2946     }
2947   }
2948 
2949   /* Setup of Graph */
2950   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2951     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2952   }
2953   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2954     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2955   }
2956   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2957 
2958   /* Graph's connected components analysis */
2959   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2960 
2961   /* print some info to stdout */
2962   if (pcbddc->dbg_flag) {
2963     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2964   }
2965 
2966   /* mark topography has done */
2967   pcbddc->recompute_topography = PETSC_FALSE;
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 #undef __FUNCT__
2972 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2973 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2974 {
2975   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2976   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2977   PetscErrorCode ierr;
2978 
2979   PetscFunctionBegin;
2980   n = 0;
2981   vertices = 0;
2982   if (pcbddc->ConstraintMatrix) {
2983     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2984     for (i=0;i<local_primal_size;i++) {
2985       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2986       if (size_of_constraint == 1) n++;
2987       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2988     }
2989     if (vertices_idx) {
2990       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2991       n = 0;
2992       for (i=0;i<local_primal_size;i++) {
2993         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2994         if (size_of_constraint == 1) {
2995           vertices[n++]=row_cmat_indices[0];
2996         }
2997         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2998       }
2999     }
3000   }
3001   *n_vertices = n;
3002   if (vertices_idx) *vertices_idx = vertices;
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 #undef __FUNCT__
3007 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3008 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3009 {
3010   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3011   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3012   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3013   PetscBT        touched;
3014   PetscErrorCode ierr;
3015 
3016     /* This function assumes that the number of local constraints per connected component
3017        is not greater than the number of nodes defined for the connected component
3018        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3019   PetscFunctionBegin;
3020   n = 0;
3021   constraints_index = 0;
3022   if (pcbddc->ConstraintMatrix) {
3023     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3024     max_size_of_constraint = 0;
3025     for (i=0;i<local_primal_size;i++) {
3026       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3027       if (size_of_constraint > 1) {
3028         n++;
3029       }
3030       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3031       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3032     }
3033     if (constraints_idx) {
3034       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3035       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3036       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3037       n = 0;
3038       for (i=0;i<local_primal_size;i++) {
3039         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3040         if (size_of_constraint > 1) {
3041           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3042           /* find first untouched local node */
3043           j = 0;
3044           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3045           min_index = row_cmat_global_indices[j];
3046           min_loc = j;
3047           /* search the minimum among nodes not yet touched on the connected component
3048              since there can be more than one constraint on a single cc */
3049           for (j=1;j<size_of_constraint;j++) {
3050             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3051               min_index = row_cmat_global_indices[j];
3052               min_loc = j;
3053             }
3054           }
3055           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3056           constraints_index[n++] = row_cmat_indices[min_loc];
3057         }
3058         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3059       }
3060       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3061       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3062     }
3063   }
3064   *n_constraints = n;
3065   if (constraints_idx) *constraints_idx = constraints_index;
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "PCBDDCSubsetNumbering"
3071 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3072 {
3073   Vec            local_vec,global_vec;
3074   IS             seqis,paris;
3075   VecScatter     scatter_ctx;
3076   PetscScalar    *array;
3077   PetscInt       *temp_global_dofs;
3078   PetscScalar    globalsum;
3079   PetscInt       i,j,s;
3080   PetscInt       nlocals,first_index,old_index,max_local;
3081   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3082   PetscMPIInt    *dof_sizes,*dof_displs;
3083   PetscBool      first_found;
3084   PetscErrorCode ierr;
3085 
3086   PetscFunctionBegin;
3087   /* mpi buffers */
3088   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3089   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3090   j = ( !rank_prec_comm ? size_prec_comm : 0);
3091   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3092   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3093   /* get maximum size of subset */
3094   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3095   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3096   max_local = 0;
3097   for (i=0;i<n_local_dofs;i++) {
3098     if (max_local < temp_global_dofs[i] ) {
3099       max_local = temp_global_dofs[i];
3100     }
3101   }
3102   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3103   max_global++;
3104   max_local = 0;
3105   for (i=0;i<n_local_dofs;i++) {
3106     if (max_local < local_dofs[i] ) {
3107       max_local = local_dofs[i];
3108     }
3109   }
3110   max_local++;
3111   /* allocate workspace */
3112   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3113   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3114   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3115   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3116   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3117   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3118   /* create scatter */
3119   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3120   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3121   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3122   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3123   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3124   /* init array */
3125   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3126   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3127   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3128   if (local_dofs_mult) {
3129     for (i=0;i<n_local_dofs;i++) {
3130       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3131     }
3132   } else {
3133     for (i=0;i<n_local_dofs;i++) {
3134       array[local_dofs[i]]=1.0;
3135     }
3136   }
3137   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3138   /* scatter into global vec and get total number of global dofs */
3139   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3140   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3141   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3142   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3143   /* Fill global_vec with cumulative function for global numbering */
3144   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3145   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3146   nlocals = 0;
3147   first_index = -1;
3148   first_found = PETSC_FALSE;
3149   for (i=0;i<s;i++) {
3150     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3151       first_found = PETSC_TRUE;
3152       first_index = i;
3153     }
3154     nlocals += (PetscInt)PetscRealPart(array[i]);
3155   }
3156   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3157   if (!rank_prec_comm) {
3158     dof_displs[0]=0;
3159     for (i=1;i<size_prec_comm;i++) {
3160       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3161     }
3162   }
3163   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3164   if (first_found) {
3165     array[first_index] += (PetscScalar)nlocals;
3166     old_index = first_index;
3167     for (i=first_index+1;i<s;i++) {
3168       if (PetscRealPart(array[i]) > 0.1) {
3169         array[i] += array[old_index];
3170         old_index = i;
3171       }
3172     }
3173   }
3174   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3175   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3176   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3177   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3178   /* get global ordering of local dofs */
3179   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3180   if (local_dofs_mult) {
3181     for (i=0;i<n_local_dofs;i++) {
3182       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3183     }
3184   } else {
3185     for (i=0;i<n_local_dofs;i++) {
3186       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3187     }
3188   }
3189   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3190   /* free workspace */
3191   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3192   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3193   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3194   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3195   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3196   /* return pointer to global ordering of local dofs */
3197   *global_numbering_subset = temp_global_dofs;
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 #undef __FUNCT__
3202 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3203 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3204 {
3205   PetscInt       i,j;
3206   PetscScalar    *alphas;
3207   PetscErrorCode ierr;
3208 
3209   PetscFunctionBegin;
3210   /* this implements stabilized Gram-Schmidt */
3211   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3212   for (i=0;i<n;i++) {
3213     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3214     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3215     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3216   }
3217   ierr = PetscFree(alphas);CHKERRQ(ierr);
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 #undef __FUNCT__
3222 #define __FUNCT__ "MatISGetSubassemblingPattern"
3223 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3224 {
3225   Mat             subdomain_adj;
3226   IS              new_ranks,ranks_send_to;
3227   MatPartitioning partitioner;
3228   Mat_IS          *matis;
3229   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3230   PetscInt        prank;
3231   PetscMPIInt     size,rank,color;
3232   PetscInt        *xadj,*adjncy,*oldranks;
3233   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3234   PetscInt        i,local_size,threshold=0;
3235   PetscErrorCode  ierr;
3236   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3237   PetscSubcomm    subcomm;
3238 
3239   PetscFunctionBegin;
3240   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3241   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3242   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3243 
3244   /* Get info on mapping */
3245   matis = (Mat_IS*)(mat->data);
3246   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3247   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3248 
3249   /* build local CSR graph of subdomains' connectivity */
3250   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3251   xadj[0] = 0;
3252   xadj[1] = PetscMax(n_neighs-1,0);
3253   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3254   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3255 
3256   if (threshold) {
3257     PetscInt xadj_count = 0;
3258     for (i=1;i<n_neighs;i++) {
3259       if (n_shared[i] > threshold) {
3260         adjncy[xadj_count] = neighs[i];
3261         adjncy_wgt[xadj_count] = n_shared[i];
3262         xadj_count++;
3263       }
3264     }
3265     xadj[1] = xadj_count;
3266   } else {
3267     if (xadj[1]) {
3268       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3269       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3270     }
3271   }
3272   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3273   if (use_square) {
3274     for (i=0;i<xadj[1];i++) {
3275       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3276     }
3277   }
3278   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3279 
3280   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3281 
3282   /*
3283     Restrict work on active processes only.
3284   */
3285   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3286   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3287   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3288   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3289   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3290   if (color) {
3291     ierr = PetscFree(xadj);CHKERRQ(ierr);
3292     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3293     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3294   } else {
3295     PetscInt coarsening_ratio;
3296     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3297     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3298     prank = rank;
3299     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3300     /*
3301     for (i=0;i<size;i++) {
3302       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3303     }
3304     */
3305     for (i=0;i<xadj[1];i++) {
3306       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3307     }
3308     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3309     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3310     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3311 
3312     /* Partition */
3313     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3314     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3315     if (use_vwgt) {
3316       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3317       v_wgt[0] = local_size;
3318       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3319     }
3320     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3321     coarsening_ratio = size/n_subdomains;
3322     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3323     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3324     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3325     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3326 
3327     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3328     if (contiguous) {
3329       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3330     } else {
3331       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3332     }
3333     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3334     /* clean up */
3335     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3336     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3337     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3338     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3339   }
3340   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3341 
3342   /* assemble parallel IS for sends */
3343   i = 1;
3344   if (color) i=0;
3345   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3346 
3347   /* get back IS */
3348   *is_sends = ranks_send_to;
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatISSubassemble"
3356 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3357 {
3358   Mat                    local_mat;
3359   Mat_IS                 *matis;
3360   IS                     is_sends_internal;
3361   PetscInt               rows,cols,new_local_rows;
3362   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3363   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3364   ISLocalToGlobalMapping l2gmap;
3365   PetscInt*              l2gmap_indices;
3366   const PetscInt*        is_indices;
3367   MatType                new_local_type;
3368   /* buffers */
3369   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3370   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3371   PetscInt               *recv_buffer_idxs_local;
3372   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3373   /* MPI */
3374   MPI_Comm               comm,comm_n;
3375   PetscSubcomm           subcomm;
3376   PetscMPIInt            n_sends,n_recvs,commsize;
3377   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3378   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3379   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3380   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3381   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3382   PetscErrorCode         ierr;
3383 
3384   PetscFunctionBegin;
3385   /* TODO: add missing checks */
3386   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3387   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3388   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3389   PetscValidLogicalCollectiveInt(mat,nis,7);
3390   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3391   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3392   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3393   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3394   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3395   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3396   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3397   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3398     PetscInt mrows,mcols,mnrows,mncols;
3399     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3400     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3401     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3402     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3403     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3404     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3405   }
3406   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3407   PetscValidLogicalCollectiveInt(mat,bs,0);
3408   /* prepare IS for sending if not provided */
3409   if (!is_sends) {
3410     PetscBool pcontig = PETSC_TRUE;
3411     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3412     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3413   } else {
3414     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3415     is_sends_internal = is_sends;
3416   }
3417 
3418   /* get pointer of MATIS data */
3419   matis = (Mat_IS*)mat->data;
3420 
3421   /* get comm */
3422   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3423 
3424   /* compute number of sends */
3425   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3426   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3427 
3428   /* compute number of receives */
3429   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3430   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3431   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3432   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3433   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3434   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3435   ierr = PetscFree(iflags);CHKERRQ(ierr);
3436 
3437   /* restrict comm if requested */
3438   subcomm = 0;
3439   destroy_mat = PETSC_FALSE;
3440   if (restrict_comm) {
3441     PetscMPIInt color,rank,subcommsize;
3442     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3443     color = 0;
3444     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3445     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3446     subcommsize = commsize - subcommsize;
3447     /* check if reuse has been requested */
3448     if (reuse == MAT_REUSE_MATRIX) {
3449       if (*mat_n) {
3450         PetscMPIInt subcommsize2;
3451         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3452         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3453         comm_n = PetscObjectComm((PetscObject)*mat_n);
3454       } else {
3455         comm_n = PETSC_COMM_SELF;
3456       }
3457     } else { /* MAT_INITIAL_MATRIX */
3458       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3459       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3460       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3461       comm_n = subcomm->comm;
3462     }
3463     /* flag to destroy *mat_n if not significative */
3464     if (color) destroy_mat = PETSC_TRUE;
3465   } else {
3466     comm_n = comm;
3467   }
3468 
3469   /* prepare send/receive buffers */
3470   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3471   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3472   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3473   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3474   if (nis) {
3475     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3476   }
3477 
3478   /* Get data from local matrices */
3479   if (!isdense) {
3480     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3481     /* TODO: See below some guidelines on how to prepare the local buffers */
3482     /*
3483        send_buffer_vals should contain the raw values of the local matrix
3484        send_buffer_idxs should contain:
3485        - MatType_PRIVATE type
3486        - PetscInt        size_of_l2gmap
3487        - PetscInt        global_row_indices[size_of_l2gmap]
3488        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3489     */
3490   } else {
3491     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3492     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3493     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3494     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3495     send_buffer_idxs[1] = i;
3496     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3497     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3498     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3499     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3500     for (i=0;i<n_sends;i++) {
3501       ilengths_vals[is_indices[i]] = len*len;
3502       ilengths_idxs[is_indices[i]] = len+2;
3503     }
3504   }
3505   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3506   /* additional is (if any) */
3507   if (nis) {
3508     PetscMPIInt psum;
3509     PetscInt j;
3510     for (j=0,psum=0;j<nis;j++) {
3511       PetscInt plen;
3512       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3513       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3514       psum += len+1; /* indices + lenght */
3515     }
3516     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3517     for (j=0,psum=0;j<nis;j++) {
3518       PetscInt plen;
3519       const PetscInt *is_array_idxs;
3520       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3521       send_buffer_idxs_is[psum] = plen;
3522       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3523       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3524       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3525       psum += plen+1; /* indices + lenght */
3526     }
3527     for (i=0;i<n_sends;i++) {
3528       ilengths_idxs_is[is_indices[i]] = psum;
3529     }
3530     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3531   }
3532 
3533   buf_size_idxs = 0;
3534   buf_size_vals = 0;
3535   buf_size_idxs_is = 0;
3536   for (i=0;i<n_recvs;i++) {
3537     buf_size_idxs += (PetscInt)olengths_idxs[i];
3538     buf_size_vals += (PetscInt)olengths_vals[i];
3539     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3540   }
3541   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3542   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3543   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3544 
3545   /* get new tags for clean communications */
3546   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3547   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3548   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3549 
3550   /* allocate for requests */
3551   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3552   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3553   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3554   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3555   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3556   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3557 
3558   /* communications */
3559   ptr_idxs = recv_buffer_idxs;
3560   ptr_vals = recv_buffer_vals;
3561   ptr_idxs_is = recv_buffer_idxs_is;
3562   for (i=0;i<n_recvs;i++) {
3563     source_dest = onodes[i];
3564     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3565     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3566     ptr_idxs += olengths_idxs[i];
3567     ptr_vals += olengths_vals[i];
3568     if (nis) {
3569       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3570       ptr_idxs_is += olengths_idxs_is[i];
3571     }
3572   }
3573   for (i=0;i<n_sends;i++) {
3574     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3575     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3576     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3577     if (nis) {
3578       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3579     }
3580   }
3581   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3582   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3583 
3584   /* assemble new l2g map */
3585   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3586   ptr_idxs = recv_buffer_idxs;
3587   new_local_rows = 0;
3588   for (i=0;i<n_recvs;i++) {
3589     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3590     ptr_idxs += olengths_idxs[i];
3591   }
3592   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3593   ptr_idxs = recv_buffer_idxs;
3594   new_local_rows = 0;
3595   for (i=0;i<n_recvs;i++) {
3596     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3597     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3598     ptr_idxs += olengths_idxs[i];
3599   }
3600   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3601   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3602   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3603 
3604   /* infer new local matrix type from received local matrices type */
3605   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3606   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3607   if (n_recvs) {
3608     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3609     ptr_idxs = recv_buffer_idxs;
3610     for (i=0;i<n_recvs;i++) {
3611       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3612         new_local_type_private = MATAIJ_PRIVATE;
3613         break;
3614       }
3615       ptr_idxs += olengths_idxs[i];
3616     }
3617     switch (new_local_type_private) {
3618       case MATDENSE_PRIVATE:
3619         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3620           new_local_type = MATSEQAIJ;
3621           bs = 1;
3622         } else { /* if I receive only 1 dense matrix */
3623           new_local_type = MATSEQDENSE;
3624           bs = 1;
3625         }
3626         break;
3627       case MATAIJ_PRIVATE:
3628         new_local_type = MATSEQAIJ;
3629         bs = 1;
3630         break;
3631       case MATBAIJ_PRIVATE:
3632         new_local_type = MATSEQBAIJ;
3633         break;
3634       case MATSBAIJ_PRIVATE:
3635         new_local_type = MATSEQSBAIJ;
3636         break;
3637       default:
3638         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3639         break;
3640     }
3641   } else { /* by default, new_local_type is seqdense */
3642     new_local_type = MATSEQDENSE;
3643     bs = 1;
3644   }
3645 
3646   /* create MATIS object if needed */
3647   if (reuse == MAT_INITIAL_MATRIX) {
3648     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3649     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3650   } else {
3651     /* it also destroys the local matrices */
3652     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3653   }
3654   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3655   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3656 
3657   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3658 
3659   /* Global to local map of received indices */
3660   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3661   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3662   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3663 
3664   /* restore attributes -> type of incoming data and its size */
3665   buf_size_idxs = 0;
3666   for (i=0;i<n_recvs;i++) {
3667     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3668     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3669     buf_size_idxs += (PetscInt)olengths_idxs[i];
3670   }
3671   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3672 
3673   /* set preallocation */
3674   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3675   if (!newisdense) {
3676     PetscInt *new_local_nnz=0;
3677 
3678     ptr_vals = recv_buffer_vals;
3679     ptr_idxs = recv_buffer_idxs_local;
3680     if (n_recvs) {
3681       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3682     }
3683     for (i=0;i<n_recvs;i++) {
3684       PetscInt j;
3685       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3686         for (j=0;j<*(ptr_idxs+1);j++) {
3687           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3688         }
3689       } else {
3690         /* TODO */
3691       }
3692       ptr_idxs += olengths_idxs[i];
3693     }
3694     if (new_local_nnz) {
3695       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3696       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3697       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3698       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3699       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3700       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3701     } else {
3702       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3703     }
3704     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3705   } else {
3706     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3707   }
3708 
3709   /* set values */
3710   ptr_vals = recv_buffer_vals;
3711   ptr_idxs = recv_buffer_idxs_local;
3712   for (i=0;i<n_recvs;i++) {
3713     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3714       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3715       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3716       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3717       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3718       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3719     } else {
3720       /* TODO */
3721     }
3722     ptr_idxs += olengths_idxs[i];
3723     ptr_vals += olengths_vals[i];
3724   }
3725   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3726   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3727   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3728   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3729   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3730   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3731 
3732 #if 0
3733   if (!restrict_comm) { /* check */
3734     Vec       lvec,rvec;
3735     PetscReal infty_error;
3736 
3737     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3738     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3739     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3740     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3741     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3742     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3743     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3744     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3745     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3746   }
3747 #endif
3748 
3749   /* assemble new additional is (if any) */
3750   if (nis) {
3751     PetscInt **temp_idxs,*count_is,j,psum;
3752 
3753     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3754     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3755     ptr_idxs = recv_buffer_idxs_is;
3756     psum = 0;
3757     for (i=0;i<n_recvs;i++) {
3758       for (j=0;j<nis;j++) {
3759         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3760         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3761         psum += plen;
3762         ptr_idxs += plen+1; /* shift pointer to received data */
3763       }
3764     }
3765     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3766     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3767     for (i=1;i<nis;i++) {
3768       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3769     }
3770     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3771     ptr_idxs = recv_buffer_idxs_is;
3772     for (i=0;i<n_recvs;i++) {
3773       for (j=0;j<nis;j++) {
3774         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3775         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3776         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3777         ptr_idxs += plen+1; /* shift pointer to received data */
3778       }
3779     }
3780     for (i=0;i<nis;i++) {
3781       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3782       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3783       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3784     }
3785     ierr = PetscFree(count_is);CHKERRQ(ierr);
3786     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3787     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3788   }
3789   /* free workspace */
3790   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3791   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3792   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3793   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3794   if (isdense) {
3795     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3796     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3797   } else {
3798     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3799   }
3800   if (nis) {
3801     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3802     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3803   }
3804   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3805   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3806   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3807   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3808   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3809   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3810   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3811   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3812   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3813   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3814   ierr = PetscFree(onodes);CHKERRQ(ierr);
3815   if (nis) {
3816     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3817     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3818     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3819   }
3820   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3821   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3822     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3823     for (i=0;i<nis;i++) {
3824       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3825     }
3826   }
3827   PetscFunctionReturn(0);
3828 }
3829 
3830 /* temporary hack into ksp private data structure */
3831 #include <petsc-private/kspimpl.h>
3832 
3833 #undef __FUNCT__
3834 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3835 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3836 {
3837   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3838   PC_IS                  *pcis = (PC_IS*)pc->data;
3839   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3840   MatNullSpace           CoarseNullSpace=NULL;
3841   ISLocalToGlobalMapping coarse_islg;
3842   IS                     coarse_is,*isarray;
3843   PetscInt               i,im_active=-1,active_procs=-1;
3844   PetscInt               nis,nisdofs,nisneu;
3845   PC                     pc_temp;
3846   PCType                 coarse_pc_type;
3847   KSPType                coarse_ksp_type;
3848   PetscBool              multilevel_requested,multilevel_allowed;
3849   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3850   Mat                    t_coarse_mat_is;
3851   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3852   PetscMPIInt            all_procs;
3853   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3854   PetscBool              compute_vecs = PETSC_FALSE;
3855   PetscScalar            *array;
3856   PetscErrorCode         ierr;
3857 
3858   PetscFunctionBegin;
3859   /* Assign global numbering to coarse dofs */
3860   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3861     compute_vecs = PETSC_TRUE;
3862     PetscInt ocoarse_size;
3863     ocoarse_size = pcbddc->coarse_size;
3864     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3865     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3866     /* see if we can avoid some work */
3867     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3868       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3869         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3870         coarse_reuse = PETSC_FALSE;
3871       } else { /* we can safely reuse already computed coarse matrix */
3872         coarse_reuse = PETSC_TRUE;
3873       }
3874     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3875       coarse_reuse = PETSC_FALSE;
3876     }
3877     /* reset any subassembling information */
3878     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3879     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3880   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3881     coarse_reuse = PETSC_TRUE;
3882   }
3883 
3884   /* count "active" (i.e. with positive local size) and "void" processes */
3885   im_active = !!(pcis->n);
3886   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3887   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3888   void_procs = all_procs-active_procs;
3889   csin_type_simple = PETSC_TRUE;
3890   redist = PETSC_FALSE;
3891   if (pcbddc->current_level && void_procs) {
3892     csin_ml = PETSC_TRUE;
3893     ncoarse_ml = void_procs;
3894     csin_ds = PETSC_TRUE;
3895     ncoarse_ds = void_procs;
3896   } else {
3897     csin_ml = PETSC_FALSE;
3898     ncoarse_ml = all_procs;
3899     if (void_procs) {
3900       csin_ds = PETSC_TRUE;
3901       ncoarse_ds = void_procs;
3902       csin_type_simple = PETSC_FALSE;
3903     } else {
3904       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3905         csin_ds = PETSC_TRUE;
3906         ncoarse_ds = pcbddc->redistribute_coarse;
3907         redist = PETSC_TRUE;
3908       } else {
3909         csin_ds = PETSC_FALSE;
3910         ncoarse_ds = all_procs;
3911       }
3912     }
3913   }
3914 
3915   /*
3916     test if we can go multilevel: three conditions must be satisfied:
3917     - we have not exceeded the number of levels requested
3918     - we can actually subassemble the active processes
3919     - we can find a suitable number of MPI processes where we can place the subassembled problem
3920   */
3921   multilevel_allowed = PETSC_FALSE;
3922   multilevel_requested = PETSC_FALSE;
3923   if (pcbddc->current_level < pcbddc->max_levels) {
3924     multilevel_requested = PETSC_TRUE;
3925     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3926       multilevel_allowed = PETSC_FALSE;
3927     } else {
3928       multilevel_allowed = PETSC_TRUE;
3929     }
3930   }
3931   /* determine number of process partecipating to coarse solver */
3932   if (multilevel_allowed) {
3933     ncoarse = ncoarse_ml;
3934     csin = csin_ml;
3935   } else {
3936     ncoarse = ncoarse_ds;
3937     csin = csin_ds;
3938   }
3939 
3940   /* creates temporary l2gmap and IS for coarse indexes */
3941   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3942   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3943 
3944   /* creates temporary MATIS object for coarse matrix */
3945   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3946   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3947   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3948   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3949 #if 0
3950   {
3951     PetscViewer viewer;
3952     char filename[256];
3953     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3954     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3955     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3956     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3957     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3958   }
3959 #endif
3960   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3961   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3962   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3963   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3965 
3966   /* compute dofs splitting and neumann boundaries for coarse dofs */
3967   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3968     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3969     const PetscInt         *idxs;
3970     ISLocalToGlobalMapping tmap;
3971 
3972     /* create map between primal indices (in local representative ordering) and local primal numbering */
3973     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3974     /* allocate space for temporary storage */
3975     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3976     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3977     /* allocate for IS array */
3978     nisdofs = pcbddc->n_ISForDofsLocal;
3979     nisneu = !!pcbddc->NeumannBoundariesLocal;
3980     nis = nisdofs + nisneu;
3981     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3982     /* dofs splitting */
3983     for (i=0;i<nisdofs;i++) {
3984       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3985       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3986       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3987       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3988       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3989       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3990       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3991       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3992     }
3993     /* neumann boundaries */
3994     if (pcbddc->NeumannBoundariesLocal) {
3995       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3996       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3997       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3998       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3999       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4000       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4001       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4002       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4003     }
4004     /* free memory */
4005     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4006     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4007     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4008   } else {
4009     nis = 0;
4010     nisdofs = 0;
4011     nisneu = 0;
4012     isarray = NULL;
4013   }
4014   /* destroy no longer needed map */
4015   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4016 
4017   /* restrict on coarse candidates (if needed) */
4018   coarse_mat_is = NULL;
4019   if (csin) {
4020     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4021       if (redist) {
4022         PetscMPIInt rank;
4023         PetscInt spc,n_spc_p1,dest[1];
4024 
4025         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4026         spc = all_procs/pcbddc->redistribute_coarse;
4027         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
4028         if (rank > n_spc_p1*(spc+1)-1) {
4029           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4030         } else {
4031           dest[0] = rank/(spc+1);
4032         }
4033         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4034       } else {
4035         PetscInt j,tissize,*nisindices;
4036         PetscInt *coarse_candidates;
4037         const PetscInt* tisindices;
4038         /* get coarse candidates' ranks in pc communicator */
4039         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4040         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4041         for (i=0,j=0;i<all_procs;i++) {
4042           if (!coarse_candidates[i]) {
4043             coarse_candidates[j]=i;
4044             j++;
4045           }
4046         }
4047         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
4048         /* get a suitable subassembling pattern */
4049         if (csin_type_simple) {
4050           PetscMPIInt rank;
4051           PetscInt    issize,isidx;
4052           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4053           if (im_active) {
4054             issize = 1;
4055             isidx = (PetscInt)rank;
4056           } else {
4057             issize = 0;
4058             isidx = -1;
4059           }
4060           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4061         } else {
4062           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4063         }
4064         if (pcbddc->dbg_flag) {
4065           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4066           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4067           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4068           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4069           for (i=0;i<j;i++) {
4070             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4071           }
4072           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4073           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4074         }
4075         /* shift the pattern on coarse candidates */
4076         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4077         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4078         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4079         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4080         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4081         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4082         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4083       }
4084     }
4085     if (pcbddc->dbg_flag) {
4086       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4087       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4088       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4089       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4090     }
4091     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4092     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4093   } else {
4094     if (pcbddc->dbg_flag) {
4095       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4096       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4097       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4098     }
4099     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4100     coarse_mat_is = t_coarse_mat_is;
4101   }
4102 
4103   /* create local to global scatters for coarse problem */
4104   if (compute_vecs) {
4105     PetscInt lrows;
4106     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4107     if (coarse_mat_is) {
4108       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4109     } else {
4110       lrows = 0;
4111     }
4112     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4113     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4114     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4115     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4116     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4117   }
4118   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4119   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4120 
4121   /* set defaults for coarse KSP and PC */
4122   if (multilevel_allowed) {
4123     coarse_ksp_type = KSPRICHARDSON;
4124     coarse_pc_type = PCBDDC;
4125   } else {
4126     coarse_ksp_type = KSPPREONLY;
4127     coarse_pc_type = PCREDUNDANT;
4128   }
4129 
4130   /* print some info if requested */
4131   if (pcbddc->dbg_flag) {
4132     if (!multilevel_allowed) {
4133       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4134       if (multilevel_requested) {
4135         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4136       } else if (pcbddc->max_levels) {
4137         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4138       }
4139       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4140     }
4141   }
4142 
4143   /* create the coarse KSP object only once with defaults */
4144   if (coarse_mat_is) {
4145     MatReuse coarse_mat_reuse;
4146     PetscViewer dbg_viewer = NULL;
4147     if (pcbddc->dbg_flag) {
4148       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4149       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4150     }
4151     if (!pcbddc->coarse_ksp) {
4152       char prefix[256],str_level[16];
4153       size_t len;
4154       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4155       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4156       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4157       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4158       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4159       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4160       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4161       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4162       /* prefix */
4163       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4164       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4165       if (!pcbddc->current_level) {
4166         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4167         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4168       } else {
4169         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4170         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4171         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4172         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4173         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4174         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4175       }
4176       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4177       /* allow user customization */
4178       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4179       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4180     }
4181 
4182     /* get some info after set from options */
4183     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4184     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4185     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4186     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4187     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4188       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4189       isbddc = PETSC_FALSE;
4190     }
4191     if (isredundant) {
4192       KSP inner_ksp;
4193       PC inner_pc;
4194       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4195       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4196       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4197     }
4198 
4199     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4200     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4201     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4202     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4203     if (nisdofs) {
4204       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4205       for (i=0;i<nisdofs;i++) {
4206         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4207       }
4208     }
4209     if (nisneu) {
4210       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4211       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4212     }
4213 
4214     /* assemble coarse matrix */
4215     if (coarse_reuse) {
4216       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4217       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4218       coarse_mat_reuse = MAT_REUSE_MATRIX;
4219     } else {
4220       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4221     }
4222     if (isbddc || isnn) {
4223       if (pcbddc->coarsening_ratio > 1) {
4224         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4225           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4226           if (pcbddc->dbg_flag) {
4227             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4228             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4229             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4230             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4231           }
4232         }
4233         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4234       } else {
4235         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4236         coarse_mat = coarse_mat_is;
4237       }
4238     } else {
4239       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4240     }
4241     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4242 
4243     /* propagate symmetry info to coarse matrix */
4244     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4245     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4246 
4247     /* set operators */
4248     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4249     if (pcbddc->dbg_flag) {
4250       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4251     }
4252   } else { /* processes non partecipating to coarse solver (if any) */
4253     coarse_mat = 0;
4254   }
4255   ierr = PetscFree(isarray);CHKERRQ(ierr);
4256 #if 0
4257   {
4258     PetscViewer viewer;
4259     char filename[256];
4260     sprintf(filename,"coarse_mat.m");
4261     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4262     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4263     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4264     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4265   }
4266 #endif
4267 
4268   /* Compute coarse null space (special handling by BDDC only) */
4269   if (pcbddc->NullSpace) {
4270     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4271   }
4272 
4273   if (pcbddc->coarse_ksp) {
4274     Vec crhs,csol;
4275     PetscBool ispreonly;
4276     if (CoarseNullSpace) {
4277       if (isbddc) {
4278         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4279       } else {
4280         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4281       }
4282     }
4283     /* setup coarse ksp */
4284     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4285     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4286     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4287     /* hack */
4288     if (!csol) {
4289       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4290     }
4291     if (!crhs) {
4292       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4293     }
4294     /* Check coarse problem if in debug mode or if solving with an iterative method */
4295     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4296     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4297       KSP       check_ksp;
4298       KSPType   check_ksp_type;
4299       PC        check_pc;
4300       Vec       check_vec,coarse_vec;
4301       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4302       PetscInt  its;
4303       PetscBool compute_eigs;
4304       PetscReal *eigs_r,*eigs_c;
4305       PetscInt  neigs;
4306       const char *prefix;
4307 
4308       /* Create ksp object suitable for estimation of extreme eigenvalues */
4309       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4310       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4311       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4312       if (ispreonly) {
4313         check_ksp_type = KSPPREONLY;
4314         compute_eigs = PETSC_FALSE;
4315       } else {
4316         check_ksp_type = KSPGMRES;
4317         compute_eigs = PETSC_TRUE;
4318       }
4319       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4320       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4321       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4322       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4323       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4324       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4325       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4326       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4327       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4328       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4329       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4330       /* create random vec */
4331       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4332       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4333       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4334       if (CoarseNullSpace) {
4335         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4336       }
4337       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4338       /* solve coarse problem */
4339       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4340       if (CoarseNullSpace) {
4341         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4342       }
4343       /* set eigenvalue estimation if preonly has not been requested */
4344       if (compute_eigs) {
4345         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4346         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4347         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4348         lambda_max = eigs_r[neigs-1];
4349         lambda_min = eigs_r[0];
4350         if (pcbddc->use_coarse_estimates) {
4351           if (lambda_max>lambda_min) {
4352             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4353             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4354           }
4355         }
4356       }
4357 
4358       /* check coarse problem residual error */
4359       if (pcbddc->dbg_flag) {
4360         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4361         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4362         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4363         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4364         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4365         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4366         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4367         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4368         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4369         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4370         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4371         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4372         if (compute_eigs) {
4373           PetscReal lambda_max_s,lambda_min_s;
4374           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4375           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4376           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4377           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4378           for (i=0;i<neigs;i++) {
4379             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4380           }
4381         }
4382         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4383         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4384       }
4385       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4386       if (compute_eigs) {
4387         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4388         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4389       }
4390     }
4391   }
4392   /* print additional info */
4393   if (pcbddc->dbg_flag) {
4394     /* waits until all processes reaches this point */
4395     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4396     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4397     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4398   }
4399 
4400   /* free memory */
4401   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4402   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4403   PetscFunctionReturn(0);
4404 }
4405 
4406 #undef __FUNCT__
4407 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4408 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4409 {
4410   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4411   PC_IS*         pcis = (PC_IS*)pc->data;
4412   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4413   PetscInt       i,coarse_size;
4414   PetscInt       *local_primal_indices;
4415   PetscErrorCode ierr;
4416 
4417   PetscFunctionBegin;
4418   /* Compute global number of coarse dofs */
4419   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4420     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4421   }
4422   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4423 
4424   /* check numbering */
4425   if (pcbddc->dbg_flag) {
4426     PetscScalar coarsesum,*array;
4427     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4428 
4429     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4430     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4431     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4432     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4433     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4434     for (i=0;i<pcbddc->local_primal_size;i++) {
4435       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4436     }
4437     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4438     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4439     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4440     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4441     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4442     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4443     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4444     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4445     for (i=0;i<pcis->n;i++) {
4446       if (array[i] == 1.0) {
4447         set_error = PETSC_TRUE;
4448         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4449       }
4450     }
4451     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4452     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4453     for (i=0;i<pcis->n;i++) {
4454       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4455     }
4456     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4457     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4458     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4459     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4460     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4461     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4462     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4463       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4464       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4465       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4466       for (i=0;i<pcbddc->local_primal_size;i++) {
4467         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4468       }
4469       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4470     }
4471     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4472     if (set_error_reduced) {
4473       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4474     }
4475   }
4476   /* get back data */
4477   *coarse_size_n = coarse_size;
4478   *local_primal_indices_n = local_primal_indices;
4479   PetscFunctionReturn(0);
4480 }
4481 
4482 #undef __FUNCT__
4483 #define __FUNCT__ "PCBDDCGlobalToLocal"
4484 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4485 {
4486   IS             localis_t;
4487   PetscInt       i,lsize,*idxs,n;
4488   PetscScalar    *vals;
4489   PetscErrorCode ierr;
4490 
4491   PetscFunctionBegin;
4492   /* get indices in local ordering exploiting local to global map */
4493   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4494   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4495   for (i=0;i<lsize;i++) vals[i] = 1.0;
4496   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4497   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4498   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4499   if (idxs) { /* multilevel guard */
4500     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4501   }
4502   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4503   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4504   ierr = PetscFree(vals);CHKERRQ(ierr);
4505   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4506   /* now compute set in local ordering */
4507   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4508   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4509   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4510   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4511   for (i=0,lsize=0;i<n;i++) {
4512     if (PetscRealPart(vals[i]) > 0.5) {
4513       lsize++;
4514     }
4515   }
4516   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4517   for (i=0,lsize=0;i<n;i++) {
4518     if (PetscRealPart(vals[i]) > 0.5) {
4519       idxs[lsize++] = i;
4520     }
4521   }
4522   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4523   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4524   *localis = localis_t;
4525   PetscFunctionReturn(0);
4526 }
4527 
4528 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4529 #undef __FUNCT__
4530 #define __FUNCT__ "PCBDDCMatMult_Private"
4531 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4532 {
4533   PCBDDCChange_ctx change_ctx;
4534   PetscErrorCode   ierr;
4535 
4536   PetscFunctionBegin;
4537   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4538   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4539   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4540   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4541   PetscFunctionReturn(0);
4542 }
4543 
4544 #undef __FUNCT__
4545 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4546 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4547 {
4548   PCBDDCChange_ctx change_ctx;
4549   PetscErrorCode   ierr;
4550 
4551   PetscFunctionBegin;
4552   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4553   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4554   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4555   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4556   PetscFunctionReturn(0);
4557 }
4558 
4559 #undef __FUNCT__
4560 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4561 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4562 {
4563   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4564   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4565   PetscInt            *used_xadj,*used_adjncy;
4566   PetscBool           free_used_adj;
4567   PetscErrorCode      ierr;
4568 
4569   PetscFunctionBegin;
4570   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4571   free_used_adj = PETSC_FALSE;
4572   if (pcbddc->sub_schurs_layers == -1) {
4573     used_xadj = NULL;
4574     used_adjncy = NULL;
4575   } else {
4576     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4577       used_xadj = pcbddc->mat_graph->xadj;
4578       used_adjncy = pcbddc->mat_graph->adjncy;
4579     } else if (pcbddc->computed_rowadj) {
4580       used_xadj = pcbddc->mat_graph->xadj;
4581       used_adjncy = pcbddc->mat_graph->adjncy;
4582     } else {
4583       Mat            mat_adj;
4584       PetscBool      flg_row=PETSC_TRUE;
4585       const PetscInt *xadj,*adjncy;
4586       PetscInt       nvtxs;
4587 
4588       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4589       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4590       if (!flg_row) {
4591         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4592       }
4593       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4594       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4595       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4596       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4597       if (!flg_row) {
4598         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4599       }
4600       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4601       free_used_adj = PETSC_TRUE;
4602     }
4603   }
4604   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4605 
4606   /* free adjacency */
4607   if (free_used_adj) {
4608     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4609   }
4610   PetscFunctionReturn(0);
4611 }
4612 
4613 #undef __FUNCT__
4614 #define __FUNCT__ "PCBDDCInitSubSchurs"
4615 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4616 {
4617   PC_IS               *pcis=(PC_IS*)pc->data;
4618   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4619   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4620   PCBDDCGraph         graph;
4621   Mat                 S_j;
4622   PetscErrorCode      ierr;
4623 
4624   PetscFunctionBegin;
4625   /* attach interface graph for determining subsets */
4626   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4627     IS verticesIS;
4628 
4629     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4630     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4631     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4632     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4633     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4634     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4635 /*
4636     if (pcbddc->dbg_flag) {
4637       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4638     }
4639 */
4640   } else {
4641     graph = pcbddc->mat_graph;
4642   }
4643 
4644   /* Create Schur complement matrix */
4645   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4646   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4647 
4648   /* sub_schurs init */
4649   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4650   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4651   /* free graph struct */
4652   if (pcbddc->sub_schurs_rebuild) {
4653     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4654   }
4655   PetscFunctionReturn(0);
4656 }
4657