xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 03af80d8eacb4669ad82e6c0b8f6795c0ee1ec04)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       cum += subset_size*n;
66       cum2 += n;
67       maxneigs = PetscMax(maxneigs,n);
68     }
69   }
70 
71   if (mss) {
72     if (symmetric) {
73       PetscBLASInt B_itype = 1;
74       PetscBLASInt B_N = mss;
75       PetscScalar  zero = 0.0;
76       PetscScalar  eps = 0.0; /* dlamch? */
77 
78       B_lwork = -1;
79       S = NULL;
80       St = NULL;
81       eigs = NULL;
82       eigv = NULL;
83       B_iwork = NULL;
84       B_ifail = NULL;
85       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
86 #if defined(PETSC_USE_COMPLEX)
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
88 #else
89       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
90 #endif
91       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
92       ierr = PetscFPTrapPop();CHKERRQ(ierr);
93     } else {
94         /* TODO */
95     }
96   } else {
97     lwork = 0;
98   }
99 
100   nv = 0;
101   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
102     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
103   }
104   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
105   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
106                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
107 #if defined(PETSC_USE_COMPLEX)
108   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
109 #endif
110   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
111   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
112                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
113                       nv+cum,&pcbddc->adaptive_constraints_idxs,
114                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
115   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
116 
117   maxneigs = 0;
118   cum = cum2 = cumarray = 0;
119   if (sub_schurs->is_Ej_com) {
120     const PetscInt *idxs;
121 
122     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
123     for (cum=0;cum<nv;cum++) {
124       pcbddc->adaptive_constraints_n[cum] = 1;
125       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
126       pcbddc->adaptive_constraints_ptrs[cum] = cum;
127       pcbddc->adaptive_constraints_data[cum] = 1.0;
128     }
129     cum2 = cum;
130     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
131   }
132 
133   if (mss) { /* multilevel */
134     if (pcbddc->use_deluxe_scaling) {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
136     } else {
137       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
138     }
139     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
140   }
141 
142   for (i=0;i<sub_schurs->n_subs;i++) {
143     PetscInt j,subset_size;
144 
145     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
146     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
147       const PetscInt *idxs;
148       PetscScalar    one = 1.0,zero=0.0;
149       PetscBLASInt   B_N;
150 
151       /* S should be copied since we need it for deluxe scaling */
152       if (symmetric) {
153         PetscInt j;
154         for (j=0;j<subset_size;j++) {
155           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
156         }
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160       } else {
161         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
162         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
163       }
164       /* is always this the right matrix? */
165       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166 
167       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
168       /* St = Starray+cumarray; */
169 
170       if (symmetric) {
171         PetscBLASInt B_itype = 1;
172         PetscBLASInt B_IL = 1, B_IU;
173         PetscScalar  eps = -1.0; /* dlamch? */
174         PetscInt     nmin_s;
175 
176         /* ask for eigenvalues lower than thresh */
177         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
178         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
179         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
180 #if defined(PETSC_USE_COMPLEX)
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
182 #else
183         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
184 #endif
185         ierr = PetscFPTrapPop();CHKERRQ(ierr);
186         if (B_ierr) {
187           if (B_ierr < 0 ) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
189           } else if (B_ierr <= B_N) {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
191           } else {
192             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
193           }
194         }
195 
196         if (B_neigs > nmax) {
197           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
198           B_neigs = nmax;
199         }
200 
201         nmin_s = PetscMin(nmin,B_N);
202         if (B_neigs < nmin_s) {
203           PetscBLASInt B_neigs2;
204 
205           B_IL = B_neigs + 1;
206           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
207           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
208           if (symmetric) {
209             PetscInt j;
210             for (j=0;j<subset_size;j++) {
211               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
212             }
213             for (j=0;j<subset_size;j++) {
214               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
215             }
216           } else {
217             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
219           }
220           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
221 #if defined(PETSC_USE_COMPLEX)
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
223 #else
224           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
225 #endif
226           ierr = PetscFPTrapPop();CHKERRQ(ierr);
227           B_neigs += B_neigs2;
228         }
229         if (B_ierr) {
230           if (B_ierr < 0 ) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
232           } else if (B_ierr <= B_N) {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
234           } else {
235             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
236           }
237         }
238         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
239         for (j=0;j<B_neigs;j++) {
240           if (eigs[j] == 0.0) {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
242           } else {
243             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
244           }
245         }
246       } else {
247           /* TODO */
248       }
249       maxneigs = PetscMax(B_neigs,maxneigs);
250       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
251 
252       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
253       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
254       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
255 #if 0
256       PetscInt ii;
257       for (ii=0;ii<B_neigs;ii++) {
258         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
259         for (j=0;j<B_N;j++) {
260           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
261         }
262       }
263 #endif
264       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
265       for (j=0;j<B_neigs;j++) {
266 #if 0
267         {
268           PetscBLASInt Blas_N,Blas_one = 1.0;
269           PetscScalar norm;
270           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
271           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
272           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
273             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
274           } else {
275             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
276           }
277           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
278         }
279 #endif
280         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
281         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
282         cum2 += subset_size;
283       }
284       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
285     }
286     /* shift for next computation */
287     cumarray += subset_size*subset_size;
288   }
289   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
290   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
291 
292   if (mss) {
293     if (pcbddc->use_deluxe_scaling) {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
295     } else {
296       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
297     }
298     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
299   }
300   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
301 #if defined(PETSC_USE_COMPLEX)
302   ierr = PetscFree(rwork);CHKERRQ(ierr);
303 #endif
304   if (pcbddc->dbg_flag) {
305     PetscInt maxneigs_r;
306     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
307     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "PCBDDCSetUpSolvers"
314 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
315 {
316   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
317   PetscScalar    *coarse_submat_vals;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   /* Setup local scatters R_to_B and (optionally) R_to_D */
322   /* PCBDDCSetUpLocalWorkVectors should be called first! */
323   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
324 
325   /* Setup local neumann solver ksp_R */
326   /* PCBDDCSetUpLocalScatters should be called first! */
327   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
328 
329   /* Change global null space passed in by the user if change of basis has been requested */
330   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
331     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
332   }
333 
334   /*
335      Setup local correction and local part of coarse basis.
336      Gives back the dense local part of the coarse matrix in column major ordering
337   */
338   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
339 
340   /* Compute total number of coarse nodes and setup coarse solver */
341   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
342 
343   /* free */
344   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "PCBDDCResetCustomization"
350 PetscErrorCode PCBDDCResetCustomization(PC pc)
351 {
352   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
358   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
360   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
361   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
362   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
363   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
364   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
365   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
366   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetTopography"
372 PetscErrorCode PCBDDCResetTopography(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
379   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
380   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
381   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
382   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNCT__
387 #define __FUNCT__ "PCBDDCResetSolvers"
388 PetscErrorCode PCBDDCResetSolvers(PC pc)
389 {
390   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
391   PetscScalar    *array;
392   PetscErrorCode ierr;
393 
394   PetscFunctionBegin;
395   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
396   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
397   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
398   ierr = PetscFree(array);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
404   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
405   if (pcbddc->local_auxmat2) {
406     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
407     ierr = PetscFree(array);CHKERRQ(ierr);
408   }
409   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
412   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
416   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
419   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
422   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
424   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
431 {
432   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
433   PC_IS          *pcis = (PC_IS*)pc->data;
434   VecType        impVecType;
435   PetscInt       n_constraints,n_R,old_size;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   if (!pcbddc->ConstraintMatrix) {
440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
441   }
442   /* get sizes */
443   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
444   n_R = pcis->n-pcbddc->n_actual_vertices;
445   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
446   /* local work vectors (try to avoid unneeded work)*/
447   /* R nodes */
448   old_size = -1;
449   if (pcbddc->vec1_R) {
450     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
451   }
452   if (n_R != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
455     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
456     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
457     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
458     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
459   }
460   /* local primal dofs */
461   old_size = -1;
462   if (pcbddc->vec1_P) {
463     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
464   }
465   if (pcbddc->local_primal_size != old_size) {
466     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
468     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
470   }
471   /* local explicit constraints */
472   old_size = -1;
473   if (pcbddc->vec1_C) {
474     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
475   }
476   if (n_constraints && n_constraints != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCBDDCSetUpCorrection"
487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
488 {
489   PetscErrorCode         ierr;
490   /* pointers to pcis and pcbddc */
491   PC_IS*                 pcis = (PC_IS*)pc->data;
492   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
493   /* submatrices of local problem */
494   Mat                    A_RV,A_VR,A_VV;
495   /* submatrices of local coarse problem */
496   Mat                    S_VV,S_CV,S_VC,S_CC;
497   /* working matrices */
498   Mat                    C_CR;
499   /* additional working stuff */
500   PC                     pc_R;
501   Mat                    F;
502   PetscBool              isLU,isCHOL,isILU;
503 
504   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
505   PetscScalar            *work;
506   PetscInt               *idx_V_B;
507   PetscInt               n,n_vertices,n_constraints;
508   PetscInt               i,n_R,n_D,n_B;
509   PetscBool              unsymmetric_check;
510   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
511   MatType                impMatType;
512   /* some shortcuts to scalars */
513   PetscScalar            one=1.0,m_one=-1.0;
514 
515   PetscFunctionBegin;
516   /* get number of vertices (corners plus constraints with change of basis)
517      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
518   n_vertices = pcbddc->n_actual_vertices;
519   n_constraints = pcbddc->local_primal_size-n_vertices;
520   /* Set Non-overlapping dimensions */
521   n_B = pcis->n_B; n_D = pcis->n - n_B;
522   n_R = pcis->n-n_vertices;
523 
524   /* Set types for local objects needed by BDDC precondtioner */
525   impMatType = MATSEQDENSE;
526 
527   /* vertices in boundary numbering */
528   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
529   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
530   if (i != n_vertices) {
531     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
532   }
533 
534   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
535   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
542   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
543   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
544 
545   unsymmetric_check = PETSC_FALSE;
546   /* allocate workspace */
547   n = 0;
548   if (n_constraints) {
549     n += n_R*n_constraints;
550   }
551   if (n_vertices) {
552     n = PetscMax(2*n_R*n_vertices,n);
553   }
554   if (!pcbddc->issym) {
555     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
556     unsymmetric_check = PETSC_TRUE;
557   }
558   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
559 
560   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
561   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
564   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
565   if (isLU || isILU || isCHOL) {
566     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
567   } else {
568     F = NULL;
569   }
570 
571   /* Precompute stuffs needed for preprocessing and application of BDDC*/
572   if (n_constraints) {
573     Mat M1,M2,M3;
574     IS  is_aux;
575     /* see if we can save some allocations */
576     if (pcbddc->local_auxmat2) {
577       PetscInt on_R,on_constraints;
578       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
579       if (on_R != n_R || on_constraints != n_constraints) {
580         PetscScalar *marray;
581 
582         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
583         ierr = PetscFree(marray);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
585         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
586       }
587     }
588     /* auxiliary matrices */
589     if (!pcbddc->local_auxmat2) {
590       PetscScalar *marray;
591 
592       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
594       marray += n_R*n_constraints;
595       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
596     }
597 
598     /* Extract constraints on R nodes: C_{CR}  */
599     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
600     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
601     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
602 
603     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
604     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
605     for (i=0;i<n_constraints;i++) {
606       const PetscScalar *row_cmat_values;
607       const PetscInt    *row_cmat_indices;
608       PetscInt          size_of_constraint,j;
609 
610       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
611       for (j=0;j<size_of_constraint;j++) {
612         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
613       }
614       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
615     }
616     if (F) {
617       Mat B;
618 
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
620       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
621       ierr = MatDestroy(&B);CHKERRQ(ierr);
622     } else {
623       PetscScalar *xarray;
624       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
625       for (i=0;i<n_constraints;i++) {
626         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
627         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
628         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
630         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
631       }
632       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
633     }
634 
635     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
636     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
637     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
639     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
640     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
641     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
642     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
643     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
644     ierr = MatDestroy(&M2);CHKERRQ(ierr);
645     ierr = MatDestroy(&M3);CHKERRQ(ierr);
646     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
647     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
648     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650   }
651   /* Get submatrices from subdomain matrix */
652   if (n_vertices) {
653     Mat       newmat;
654     IS        is_aux;
655     PetscInt  ibs,mbs;
656     PetscBool issbaij;
657 
658     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
659     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
660     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
661     if (ibs != mbs) { /* need to convert to SEQAIJ */
662       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
663       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
665       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
666       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
667     } else {
668       /* this is safe */
669       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
670       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
671       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
672         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
673         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
674         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
675         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
676         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
677       } else {
678         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
679         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
680       }
681     }
682     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
683   }
684 
685   /* Matrix of coarse basis functions (local) */
686   if (pcbddc->coarse_phi_B) {
687     PetscInt on_B,on_primal,on_D=n_D;
688     if (pcbddc->coarse_phi_D) {
689       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
690     }
691     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
692     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
693       PetscScalar *marray;
694 
695       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
696       ierr = PetscFree(marray);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
700       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
701     }
702   }
703 
704   if (!pcbddc->coarse_phi_B) {
705     PetscScalar *marray;
706 
707     n = n_B*pcbddc->local_primal_size;
708     if (pcbddc->switch_static || pcbddc->dbg_flag) {
709       n += n_D*pcbddc->local_primal_size;
710     }
711     if (!pcbddc->issym) {
712       n *= 2;
713     }
714     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
715     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
716     n = n_B*pcbddc->local_primal_size;
717     if (pcbddc->switch_static || pcbddc->dbg_flag) {
718       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       n += n_D*pcbddc->local_primal_size;
720     }
721     if (!pcbddc->issym) {
722       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       if (pcbddc->switch_static || pcbddc->dbg_flag) {
724         n = n_B*pcbddc->local_primal_size;
725         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
726       }
727     } else {
728       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
729       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
730       if (pcbddc->switch_static || pcbddc->dbg_flag) {
731         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
732         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
733       }
734     }
735   }
736   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
737   /* vertices */
738   if (n_vertices) {
739 
740     if (n_R) {
741       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
742       PetscBLASInt B_N,B_one = 1;
743       PetscScalar  *x,*y;
744 
745       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
746       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
747       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
748       if (F) {
749         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
750       } else {
751         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
752         for (i=0;i<n_vertices;i++) {
753           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
754           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
755           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
756           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
757           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
758         }
759         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
760       }
761       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
762       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
763       if (n_constraints) {
764         Mat B;
765         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
766         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
767         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
768         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
769         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
770         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
771         ierr = MatDestroy(&B);CHKERRQ(ierr);
772       }
773       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
774       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
775       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
776       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
778       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
779       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
780       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
781       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
782       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
783       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
784       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
785     } else {
786       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
787       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
788     }
789     /* coarse basis functions */
790     for (i=0;i<n_vertices;i++) {
791       PetscScalar *y;
792 
793       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
794       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
795       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
796       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798       y[n_B*i+idx_V_B[i]] = 1.0;
799       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
800       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
801 
802       if (pcbddc->switch_static || pcbddc->dbg_flag) {
803         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
804         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
805         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
808         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
809       }
810       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
811     }
812     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
813     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
814   }
815 
816   if (n_constraints) {
817     Mat B;
818 
819     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
820     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
821     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
822     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
823     if (n_vertices) {
824       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
825     }
826     ierr = MatDestroy(&B);CHKERRQ(ierr);
827     /* coarse basis functions */
828     for (i=0;i<n_constraints;i++) {
829       PetscScalar *y;
830 
831       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
832       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
833       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
834       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
837       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
838 
839       if (pcbddc->switch_static || pcbddc->dbg_flag) {
840         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
841         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
842         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
845         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
846       }
847       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
848     }
849   }
850 
851   /* compute other basis functions for non-symmetric problems */
852   if (!pcbddc->issym) {
853     Mat B,X;
854 
855     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
856 
857     if (n_constraints) {
858       Mat S_CCT,B_C;
859 
860       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
861       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
862       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
863       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
864       if (n_vertices) {
865         Mat B_V,S_VCT;
866 
867         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
868         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
869         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
870         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
871         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
872       }
873       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
874     }
875     if (n_vertices && n_R) {
876       Mat          A_VRT;
877       PetscBLASInt B_N,B_one = 1;
878 
879       if (!n_constraints) { /* if there are no constraints, reset work */
880         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
881       }
882       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
883       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
884       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
885       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
886       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
887     }
888 
889     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
890     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
891       for (i=0;i<pcbddc->local_primal_size;i++) {
892         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
893         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
894         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
895         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
896         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
897       }
898     } else {
899       for (i=0;i<pcbddc->local_primal_size;i++) {
900         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
901         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
902         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
903         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
904         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
905       }
906     }
907     ierr = MatDestroy(&B);CHKERRQ(ierr);
908     /* coarse basis functions */
909     for (i=0;i<pcbddc->local_primal_size;i++) {
910       PetscScalar *y;
911 
912       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
913       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
914       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
915       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917       if (i<n_vertices) {
918         y[n_B*i+idx_V_B[i]] = 1.0;
919       }
920       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
921       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
922 
923       if (pcbddc->switch_static || pcbddc->dbg_flag) {
924         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
925         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
926         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
929         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
930       }
931       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
932     }
933     ierr = MatDestroy(&X);CHKERRQ(ierr);
934   }
935 
936   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
937   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
938   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
939   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
940   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
941   /* Checking coarse_sub_mat and coarse basis functios */
942   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
943   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
944   if (pcbddc->dbg_flag) {
945     Mat         coarse_sub_mat;
946     Mat         AUXMAT,TM1,TM2,TM3,TM4;
947     Mat         coarse_phi_D,coarse_phi_B;
948     Mat         coarse_psi_D,coarse_psi_B;
949     Mat         A_II,A_BB,A_IB,A_BI;
950     MatType     checkmattype=MATSEQAIJ;
951     PetscReal   real_value;
952 
953     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
954     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
955     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
956     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
957     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
958     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
959     if (unsymmetric_check) {
960       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
961       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
962     }
963     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
964 
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
966     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     if (unsymmetric_check) {
969       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
970       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
971       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
972       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
973       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
974       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
975       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
976       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
977       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
978       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
979       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
980       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
981     } else {
982       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
983       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
984       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
985       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
986       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
987       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
988       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
989       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
990     }
991     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
992     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
993     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
994     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
995     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
996     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
997 
998     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
999     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1000     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1001     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1002     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1003     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1004     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1005     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1006     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1007     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1008     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1009     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1010     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1011     if (unsymmetric_check) {
1012       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1013       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1014     }
1015     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1016   }
1017 
1018   /* free memory */
1019   ierr = PetscFree(work);CHKERRQ(ierr);
1020   if (n_vertices) {
1021     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1022   }
1023   if (n_constraints) {
1024     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1025   }
1026   /* get back data */
1027   *coarse_submat_vals_n = coarse_submat_vals;
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1033 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1034 {
1035   Mat            *work_mat;
1036   IS             isrow_s,iscol_s;
1037   PetscBool      rsorted,csorted;
1038   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1039   PetscErrorCode ierr;
1040 
1041   PetscFunctionBegin;
1042   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1043   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1044   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1046 
1047   if (!rsorted) {
1048     const PetscInt *idxs;
1049     PetscInt *idxs_sorted,i;
1050 
1051     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1052     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1053     for (i=0;i<rsize;i++) {
1054       idxs_perm_r[i] = i;
1055     }
1056     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1057     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1058     for (i=0;i<rsize;i++) {
1059       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1060     }
1061     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1062     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1063   } else {
1064     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1065     isrow_s = isrow;
1066   }
1067 
1068   if (!csorted) {
1069     if (isrow == iscol) {
1070       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1071       iscol_s = isrow_s;
1072     } else {
1073       const PetscInt *idxs;
1074       PetscInt *idxs_sorted,i;
1075 
1076       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1077       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1078       for (i=0;i<csize;i++) {
1079         idxs_perm_c[i] = i;
1080       }
1081       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1082       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1083       for (i=0;i<csize;i++) {
1084         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1085       }
1086       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1087       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1088     }
1089   } else {
1090     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1091     iscol_s = iscol;
1092   }
1093 
1094   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1095 
1096   if (!rsorted || !csorted) {
1097     Mat      new_mat;
1098     IS       is_perm_r,is_perm_c;
1099 
1100     if (!rsorted) {
1101       PetscInt *idxs_r,i;
1102       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1103       for (i=0;i<rsize;i++) {
1104         idxs_r[idxs_perm_r[i]] = i;
1105       }
1106       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1107       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1108     } else {
1109       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1110     }
1111     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1112 
1113     if (!csorted) {
1114       if (isrow_s == iscol_s) {
1115         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1116         is_perm_c = is_perm_r;
1117       } else {
1118         PetscInt *idxs_c,i;
1119         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1120         for (i=0;i<csize;i++) {
1121           idxs_c[idxs_perm_c[i]] = i;
1122         }
1123         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1124         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1125       }
1126     } else {
1127       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1128     }
1129     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1130 
1131     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1132     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1133     work_mat[0] = new_mat;
1134     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1135     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1136   }
1137 
1138   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1139   *B = work_mat[0];
1140   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1141   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1142   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1148 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1149 {
1150   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1151   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1152   Mat            new_mat;
1153   IS             is_local,is_global;
1154   PetscInt       local_size;
1155   PetscBool      isseqaij;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1160   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1161   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1162   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1163   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1164   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1165   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1166 
1167   /* check */
1168   if (pcbddc->dbg_flag) {
1169     Vec       x,x_change;
1170     PetscReal error;
1171 
1172     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1173     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1174     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1175     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1176     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1178     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1180     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1181     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1182     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1183     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1184     ierr = VecDestroy(&x);CHKERRQ(ierr);
1185     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1186   }
1187 
1188   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1189   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1190   if (isseqaij) {
1191     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1192   } else {
1193     Mat work_mat;
1194     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1195     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1196     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1197   }
1198   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1199   /*
1200   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1201   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1202   */
1203   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1209 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1210 {
1211   PC_IS*         pcis = (PC_IS*)(pc->data);
1212   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1213   IS             is_aux1,is_aux2;
1214   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1215   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1216   PetscInt       vbs,bs;
1217   PetscBT        bitmask;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221   /*
1222     No need to setup local scatters if
1223       - primal space is unchanged
1224         AND
1225       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1226         AND
1227       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1228   */
1229   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1230     PetscFunctionReturn(0);
1231   }
1232   /* destroy old objects */
1233   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1234   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1235   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1236   /* Set Non-overlapping dimensions */
1237   n_B = pcis->n_B; n_D = pcis->n - n_B;
1238   n_vertices = pcbddc->n_actual_vertices;
1239   /* create auxiliary bitmask */
1240   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1241   for (i=0;i<n_vertices;i++) {
1242     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1243   }
1244 
1245   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1246   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1247   for (i=0, n_R=0; i<pcis->n; i++) {
1248     if (!PetscBTLookup(bitmask,i)) {
1249       idx_R_local[n_R] = i;
1250       n_R++;
1251     }
1252   }
1253 
1254   /* Block code */
1255   vbs = 1;
1256   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1257   if (bs>1 && !(n_vertices%bs)) {
1258     PetscBool is_blocked = PETSC_TRUE;
1259     PetscInt  *vary;
1260     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1261     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1262     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1263     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1264     for (i=0; i<n_vertices; i++) {
1265       if (vary[i]!=0 && vary[i]!=bs) {
1266         is_blocked = PETSC_FALSE;
1267         break;
1268       }
1269     }
1270     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1271       vbs = bs;
1272       for (i=0;i<n_R/vbs;i++) {
1273         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1274       }
1275     }
1276     ierr = PetscFree(vary);CHKERRQ(ierr);
1277   }
1278   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1279   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1280 
1281   /* print some info if requested */
1282   if (pcbddc->dbg_flag) {
1283     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1284     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1285     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1286     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1287     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1288     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1289     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1290     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1291   }
1292 
1293   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1294   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1295   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1296   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1297   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298   for (i=0; i<n_D; i++) {
1299     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1300   }
1301   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1302   for (i=0, j=0; i<n_R; i++) {
1303     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1304       aux_array1[j++] = i;
1305     }
1306   }
1307   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1308   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1309   for (i=0, j=0; i<n_B; i++) {
1310     if (!PetscBTLookup(bitmask,is_indices[i])) {
1311       aux_array2[j++] = i;
1312     }
1313   }
1314   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1315   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1316   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1317   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1318   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1319 
1320   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1321     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1322     for (i=0, j=0; i<n_R; i++) {
1323       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1324         aux_array1[j++] = i;
1325       }
1326     }
1327     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1328     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1329     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1330   }
1331   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1332   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1339 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1340 {
1341   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1342   PC_IS          *pcis = (PC_IS*)pc->data;
1343   PC             pc_temp;
1344   Mat            A_RR;
1345   MatReuse       reuse;
1346   PetscScalar    m_one = -1.0;
1347   PetscReal      value;
1348   PetscInt       n_D,n_R,ibs,mbs;
1349   PetscBool      use_exact,use_exact_reduced,issbaij;
1350   PetscErrorCode ierr;
1351   /* prefixes stuff */
1352   char           dir_prefix[256],neu_prefix[256],str_level[16];
1353   size_t         len;
1354 
1355   PetscFunctionBegin;
1356 
1357   /* compute prefixes */
1358   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1359   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1360   if (!pcbddc->current_level) {
1361     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1362     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1363     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1364     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1365   } else {
1366     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1367     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1368     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1369     len -= 15; /* remove "pc_bddc_coarse_" */
1370     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1371     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1372     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1373     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1374     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1375     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1376     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1377     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1378   }
1379 
1380   /* DIRICHLET PROBLEM */
1381   if (dirichlet) {
1382     /* Matrix for Dirichlet problem is pcis->A_II */
1383     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1384     if (!pcbddc->ksp_D) { /* create object if not yet build */
1385       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1386       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1387       /* default */
1388       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1389       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1390       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1391       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1392       if (issbaij) {
1393         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1394       } else {
1395         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1396       }
1397       /* Allow user's customization */
1398       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1399       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1400     }
1401     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1402     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1403     if (!n_D) {
1404       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1405       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1406     }
1407     /* Set Up KSP for Dirichlet problem of BDDC */
1408     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1409     /* set ksp_D into pcis data */
1410     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1411     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1412     pcis->ksp_D = pcbddc->ksp_D;
1413   }
1414 
1415   /* NEUMANN PROBLEM */
1416   A_RR = 0;
1417   if (neumann) {
1418     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1419     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1420     if (pcbddc->ksp_R) { /* already created ksp */
1421       PetscInt nn_R;
1422       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1423       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1424       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1425       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1426         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1427         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1428         reuse = MAT_INITIAL_MATRIX;
1429       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1430         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1431           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1432           reuse = MAT_INITIAL_MATRIX;
1433         } else { /* safe to reuse the matrix */
1434           reuse = MAT_REUSE_MATRIX;
1435         }
1436       }
1437       /* last check */
1438       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1439         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1440         reuse = MAT_INITIAL_MATRIX;
1441       }
1442     } else { /* first time, so we need to create the matrix */
1443       reuse = MAT_INITIAL_MATRIX;
1444     }
1445     /* extract A_RR */
1446     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1447     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1448     if (ibs != mbs) {
1449       Mat newmat;
1450       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1451       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1452       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1453     } else {
1454       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1455     }
1456     if (!pcbddc->ksp_R) { /* create object if not present */
1457       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1458       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1459       /* default */
1460       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1461       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1462       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1463       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1464       if (issbaij) {
1465         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1466       } else {
1467         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1468       }
1469       /* Allow user's customization */
1470       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1471       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1472     }
1473     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1474     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1475     if (!n_R) {
1476       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1477       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1478     }
1479     /* Set Up KSP for Neumann problem of BDDC */
1480     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1481   }
1482 
1483   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1484   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1485     if (pcbddc->dbg_flag) {
1486       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1487       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1488       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1489     }
1490     if (dirichlet) { /* Dirichlet */
1491       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1492       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1493       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1494       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1495       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1496       /* need to be adapted? */
1497       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1498       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1499       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1500       /* print info */
1501       if (pcbddc->dbg_flag) {
1502         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1503         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1504       }
1505       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1506         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1507       }
1508     }
1509     if (neumann) { /* Neumann */
1510       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1511       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1512       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1513       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1514       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1515       /* need to be adapted? */
1516       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1517       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1518       /* print info */
1519       if (pcbddc->dbg_flag) {
1520         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1521         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1522       }
1523       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1524         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1525       }
1526     }
1527   }
1528   /* free Neumann problem's matrix */
1529   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1535 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1536 {
1537   PetscErrorCode ierr;
1538   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1539 
1540   PetscFunctionBegin;
1541   if (applytranspose) {
1542     if (pcbddc->local_auxmat1) {
1543       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1544       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1545     }
1546     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1547   } else {
1548     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1549     if (pcbddc->local_auxmat1) {
1550       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1551       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1552     }
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1558 #undef __FUNCT__
1559 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1560 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1561 {
1562   PetscErrorCode ierr;
1563   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1564   PC_IS*            pcis = (PC_IS*)  (pc->data);
1565   const PetscScalar zero = 0.0;
1566 
1567   PetscFunctionBegin;
1568   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1569   if (applytranspose) {
1570     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1571     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1572   } else {
1573     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1574     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1575   }
1576   /* start communications from local primal nodes to rhs of coarse solver */
1577   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1578   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580 
1581   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1582   /* TODO remove null space when doing multilevel */
1583   if (pcbddc->coarse_ksp) {
1584     if (applytranspose) {
1585       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1586     } else {
1587       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1588     }
1589   }
1590 
1591   /* Local solution on R nodes */
1592   if (pcis->n) {
1593     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1594     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1595     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1596     if (pcbddc->switch_static) {
1597       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1598       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1599     }
1600     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1601     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1602     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1603     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1604     if (pcbddc->switch_static) {
1605       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1606       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1607     }
1608   }
1609 
1610   /* communications from coarse sol to local primal nodes */
1611   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1613 
1614   /* Sum contributions from two levels */
1615   if (applytranspose) {
1616     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1617     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1618   } else {
1619     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1620     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1626 #undef __FUNCT__
1627 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1628 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1629 {
1630   PetscErrorCode ierr;
1631   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1632   PetscScalar    *array,*array2;
1633   Vec            from,to;
1634 
1635   PetscFunctionBegin;
1636   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1637     from = pcbddc->coarse_vec;
1638     to = pcbddc->vec1_P;
1639     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1640       Vec tvec;
1641       PetscInt lsize;
1642       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1643       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1644       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1645       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1646       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1647       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1648       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1649     }
1650   } else { /* from local to global -> put data in coarse right hand side */
1651     from = pcbddc->vec1_P;
1652     to = pcbddc->coarse_vec;
1653   }
1654   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1660 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1661 {
1662   PetscErrorCode ierr;
1663   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1664   PetscScalar    *array,*array2;
1665   Vec            from,to;
1666 
1667   PetscFunctionBegin;
1668   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1669     from = pcbddc->coarse_vec;
1670     to = pcbddc->vec1_P;
1671   } else { /* from local to global -> put data in coarse right hand side */
1672     from = pcbddc->vec1_P;
1673     to = pcbddc->coarse_vec;
1674   }
1675   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1676   if (smode == SCATTER_FORWARD) {
1677     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1678       Vec tvec;
1679       PetscInt lsize;
1680       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1681       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1682       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1683       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1684       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1685       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1686       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1687     }
1688   }
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 /* uncomment for testing purposes */
1693 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1694 #undef __FUNCT__
1695 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1696 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1697 {
1698   PetscErrorCode    ierr;
1699   PC_IS*            pcis = (PC_IS*)(pc->data);
1700   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1701   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1702   /* one and zero */
1703   PetscScalar       one=1.0,zero=0.0;
1704   /* space to store constraints and their local indices */
1705   PetscScalar       *temp_quadrature_constraint;
1706   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1707   /* iterators */
1708   PetscInt          i,j,k,total_counts,temp_start_ptr;
1709   /* stuff to store connected components stored in pcbddc->mat_graph */
1710   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1711   PetscInt          n_ISForFaces,n_ISForEdges;
1712   /* near null space stuff */
1713   MatNullSpace      nearnullsp;
1714   const Vec         *nearnullvecs;
1715   Vec               *localnearnullsp;
1716   PetscBool         nnsp_has_cnst;
1717   PetscInt          nnsp_size;
1718   PetscScalar       *array;
1719   /* BLAS integers */
1720   PetscBLASInt      lwork,lierr;
1721   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1722   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1723   /* LAPACK working arrays for SVD or POD */
1724   PetscBool         skip_lapack;
1725   PetscScalar       *work;
1726   PetscReal         *singular_vals;
1727 #if defined(PETSC_USE_COMPLEX)
1728   PetscReal         *rwork;
1729 #endif
1730 #if defined(PETSC_MISSING_LAPACK_GESVD)
1731   PetscBLASInt      Blas_one_2=1;
1732   PetscScalar       *temp_basis,*correlation_mat;
1733 #else
1734   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1735   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1736 #endif
1737   /* reuse */
1738   PetscInt          olocal_primal_size;
1739   PetscInt          *oprimal_indices_local_idxs;
1740   /* change of basis */
1741   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1742   PetscBool         boolforchange,qr_needed;
1743   PetscBT           touched,change_basis,qr_needed_idx;
1744   /* auxiliary stuff */
1745   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1746   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1747   PetscScalar       *temp_quadrature_constraint_work=NULL;
1748   /* some quantities */
1749   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1750   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1751 
1752 
1753   PetscFunctionBegin;
1754   /* Destroy Mat objects computed previously */
1755   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1756   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1757   /* TODO synch with adaptive selection */
1758   /* Get index sets for faces, edges and vertices from graph */
1759   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1760   /* free unneeded index sets */
1761   if (!pcbddc->use_vertices) {
1762     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1763   }
1764   if (!pcbddc->use_edges) {
1765     for (i=0;i<n_ISForEdges;i++) {
1766       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1767     }
1768     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1769     n_ISForEdges = 0;
1770   }
1771   if (!pcbddc->use_faces) {
1772     for (i=0;i<n_ISForFaces;i++) {
1773       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1774     }
1775     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1776     n_ISForFaces = 0;
1777   }
1778   /* HACKS (the following two blocks of code) */
1779   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1780     pcbddc->use_change_of_basis = PETSC_TRUE;
1781     if (!ISForEdges) {
1782       pcbddc->use_change_on_faces = PETSC_TRUE;
1783     }
1784   }
1785   if (pcbddc->NullSpace) {
1786     /* use_change_of_basis should be consistent among processors */
1787     PetscBool tbool[2],gbool[2];
1788     tbool [0] = pcbddc->use_change_of_basis;
1789     tbool [1] = pcbddc->use_change_on_faces;
1790     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1791     pcbddc->use_change_of_basis = gbool[0];
1792     pcbddc->use_change_on_faces = gbool[1];
1793   }
1794   /* print some info */
1795   if (pcbddc->dbg_flag) {
1796     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1798     i = 0;
1799     if (ISForVertices) {
1800       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1801     }
1802     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1804     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1805     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1806   }
1807 
1808   if (!pcbddc->adaptive_selection) {
1809     /* check if near null space is attached to global mat */
1810     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1811     if (nearnullsp) {
1812       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1813       /* remove any stored info */
1814       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1815       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1816       /* store information for BDDC solver reuse */
1817       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1818       pcbddc->onearnullspace = nearnullsp;
1819       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1820       for (i=0;i<nnsp_size;i++) {
1821         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1822       }
1823     } else { /* if near null space is not provided BDDC uses constants by default */
1824       nnsp_size = 0;
1825       nnsp_has_cnst = PETSC_TRUE;
1826     }
1827     /* get max number of constraints on a single cc */
1828     max_constraints = nnsp_size;
1829     if (nnsp_has_cnst) max_constraints++;
1830 
1831     /*
1832          Evaluate maximum storage size needed by the procedure
1833          - temp_indices will contain start index of each constraint stored as follows
1834          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1835          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1836          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1837                                                                                                                                                            */
1838     total_counts = n_ISForFaces+n_ISForEdges;
1839     total_counts *= max_constraints;
1840     n_vertices = 0;
1841     if (ISForVertices) {
1842       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1843     }
1844     total_counts += n_vertices;
1845     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1846     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1847     total_counts = 0;
1848     max_size_of_constraint = 0;
1849     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1850       if (i<n_ISForEdges) {
1851         used_IS = &ISForEdges[i];
1852       } else {
1853         used_IS = &ISForFaces[i-n_ISForEdges];
1854       }
1855       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1856       total_counts += j;
1857       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1858     }
1859     total_counts *= max_constraints;
1860     total_counts += n_vertices;
1861     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1862     /* get local part of global near null space vectors */
1863     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1864     for (k=0;k<nnsp_size;k++) {
1865       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1866       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1867       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1868     }
1869 
1870     /* whether or not to skip lapack calls */
1871     skip_lapack = PETSC_TRUE;
1872     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1873 
1874     /* allocate some auxiliary stuff */
1875     if (!skip_lapack || pcbddc->use_qr_single) {
1876       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1877     }
1878 
1879     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1880     if (!skip_lapack) {
1881       PetscScalar temp_work;
1882 
1883 #if defined(PETSC_MISSING_LAPACK_GESVD)
1884       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1885       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1886       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1887       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1888 #if defined(PETSC_USE_COMPLEX)
1889       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1890 #endif
1891       /* now we evaluate the optimal workspace using query with lwork=-1 */
1892       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1893       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1894       lwork = -1;
1895       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1896 #if !defined(PETSC_USE_COMPLEX)
1897       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1898 #else
1899       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1900 #endif
1901       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1902       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1903 #else /* on missing GESVD */
1904       /* SVD */
1905       PetscInt max_n,min_n;
1906       max_n = max_size_of_constraint;
1907       min_n = max_constraints;
1908       if (max_size_of_constraint < max_constraints) {
1909         min_n = max_size_of_constraint;
1910         max_n = max_constraints;
1911       }
1912       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1913 #if defined(PETSC_USE_COMPLEX)
1914       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1915 #endif
1916       /* now we evaluate the optimal workspace using query with lwork=-1 */
1917       lwork = -1;
1918       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1919       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1920       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1921       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1922 #if !defined(PETSC_USE_COMPLEX)
1923       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1924 #else
1925       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1926 #endif
1927       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1928       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1929 #endif /* on missing GESVD */
1930       /* Allocate optimal workspace */
1931       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1932       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1933     }
1934     /* Now we can loop on constraining sets */
1935     total_counts = 0;
1936     temp_indices[0] = 0;
1937     /* vertices */
1938     if (ISForVertices) {
1939       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1940       if (nnsp_has_cnst) { /* consider all vertices */
1941         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1942         for (i=0;i<n_vertices;i++) {
1943           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1944           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1945           total_counts++;
1946         }
1947       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1948         PetscBool used_vertex;
1949         for (i=0;i<n_vertices;i++) {
1950           used_vertex = PETSC_FALSE;
1951           k = 0;
1952           while (!used_vertex && k<nnsp_size) {
1953             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1954             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1955               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1956               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1957               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1958               total_counts++;
1959               used_vertex = PETSC_TRUE;
1960             }
1961             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1962             k++;
1963           }
1964         }
1965       }
1966       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1967       n_vertices = total_counts;
1968     }
1969 
1970     /* edges and faces */
1971     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1972       if (ncc<n_ISForEdges) {
1973         used_IS = &ISForEdges[ncc];
1974         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1975       } else {
1976         used_IS = &ISForFaces[ncc-n_ISForEdges];
1977         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1978       }
1979       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1980       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1981       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1982       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1983       /* change of basis should not be performed on local periodic nodes */
1984       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1985       if (nnsp_has_cnst) {
1986         PetscScalar quad_value;
1987         temp_constraints++;
1988         if (!pcbddc->use_nnsp_true) {
1989           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1990         } else {
1991           quad_value = 1.0;
1992         }
1993         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1994         for (j=0;j<size_of_constraint;j++) {
1995           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1996         }
1997         /* sort by global ordering if using lapack subroutines (not needed!) */
1998         if (!skip_lapack || pcbddc->use_qr_single) {
1999           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2000           for (j=0;j<size_of_constraint;j++) {
2001             permutation[j]=j;
2002           }
2003           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2004           for (j=0;j<size_of_constraint;j++) {
2005             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2006           }
2007           for (j=0;j<size_of_constraint;j++) {
2008             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2009             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2010           }
2011           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2012           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2013         }
2014         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2015         total_counts++;
2016       }
2017       for (k=0;k<nnsp_size;k++) {
2018         PetscReal real_value;
2019         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2020         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2021         for (j=0;j<size_of_constraint;j++) {
2022           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2023         }
2024         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2025         /* check if array is null on the connected component */
2026         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2027         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2028         if (real_value > 0.0) { /* keep indices and values */
2029           /* sort by global ordering if using lapack subroutines */
2030           if (!skip_lapack || pcbddc->use_qr_single) {
2031             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2032             for (j=0;j<size_of_constraint;j++) {
2033               permutation[j]=j;
2034             }
2035             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2036             for (j=0;j<size_of_constraint;j++) {
2037               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2038               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2039             }
2040             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2041             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2042           }
2043           temp_constraints++;
2044           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2045           total_counts++;
2046         }
2047       }
2048       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2049       valid_constraints = temp_constraints;
2050       if (!pcbddc->use_nnsp_true && temp_constraints) {
2051         if (temp_constraints == 1) { /* just normalize the constraint */
2052           PetscScalar norm;
2053           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2054           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2055           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2056           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2057         } else { /* perform SVD */
2058           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2059 
2060 #if defined(PETSC_MISSING_LAPACK_GESVD)
2061           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2062              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2063              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2064                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2065                 from that computed using LAPACKgesvd
2066              -> This is due to a different computation of eigenvectors in LAPACKheev
2067              -> The quality of the POD-computed basis will be the same */
2068           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2069           /* Store upper triangular part of correlation matrix */
2070           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2071           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2072           for (j=0;j<temp_constraints;j++) {
2073             for (k=0;k<j+1;k++) {
2074               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2075             }
2076           }
2077           /* compute eigenvalues and eigenvectors of correlation matrix */
2078           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2079           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2080 #if !defined(PETSC_USE_COMPLEX)
2081           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2082 #else
2083           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2084 #endif
2085           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2086           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2087           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2088           j = 0;
2089           while (j < temp_constraints && singular_vals[j] < tol) j++;
2090           total_counts = total_counts-j;
2091           valid_constraints = temp_constraints-j;
2092           /* scale and copy POD basis into used quadrature memory */
2093           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2096           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2097           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2098           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2099           if (j<temp_constraints) {
2100             PetscInt ii;
2101             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2102             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2103             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2104             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2105             for (k=0;k<temp_constraints-j;k++) {
2106               for (ii=0;ii<size_of_constraint;ii++) {
2107                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2108               }
2109             }
2110           }
2111 #else  /* on missing GESVD */
2112           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2113           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2114           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2115           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2116 #if !defined(PETSC_USE_COMPLEX)
2117           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2118 #else
2119           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2120 #endif
2121           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2122           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2123           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2124           k = temp_constraints;
2125           if (k > size_of_constraint) k = size_of_constraint;
2126           j = 0;
2127           while (j < k && singular_vals[k-j-1] < tol) j++;
2128           valid_constraints = k-j;
2129           total_counts = total_counts-temp_constraints+valid_constraints;
2130 #endif /* on missing GESVD */
2131         }
2132       }
2133       /* setting change_of_basis flag is safe now */
2134       if (boolforchange) {
2135         for (j=0;j<valid_constraints;j++) {
2136           PetscBTSet(change_basis,total_counts-j-1);
2137         }
2138       }
2139     }
2140     /* free workspace */
2141     if (!skip_lapack || pcbddc->use_qr_single) {
2142       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2143     }
2144     if (!skip_lapack) {
2145       ierr = PetscFree(work);CHKERRQ(ierr);
2146 #if defined(PETSC_USE_COMPLEX)
2147       ierr = PetscFree(rwork);CHKERRQ(ierr);
2148 #endif
2149       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2150 #if defined(PETSC_MISSING_LAPACK_GESVD)
2151       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2152       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2153 #endif
2154     }
2155     for (k=0;k<nnsp_size;k++) {
2156       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2157     }
2158     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2159   } else {
2160     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2161     PetscInt        cum = 0;
2162 
2163     total_counts = 0;
2164     n_vertices = 0;
2165     if (sub_schurs->is_Ej_com) {
2166       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2167     }
2168     max_constraints = 0;
2169     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2170       total_counts += pcbddc->adaptive_constraints_n[i];
2171       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2172     }
2173     temp_indices = pcbddc->adaptive_constraints_ptrs;
2174     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2175     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2176 
2177 #if 0
2178     printf("Found %d totals\n",total_counts);
2179     for (i=0;i<total_counts;i++) {
2180       printf("const %d, start %d",i,temp_indices[i]);
2181       printf(" end %d:\n",temp_indices[i+1]);
2182       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2183         printf("  idxs %d",temp_indices_to_constraint[j]);
2184         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2185       }
2186     }
2187     for (i=0;i<n_vertices;i++) {
2188       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2189     }
2190     for (i=0;i<sub_schurs->n_subs;i++) {
2191       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2192     }
2193 #endif
2194 
2195     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2196     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2197     /* Change of basis */
2198     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2199     if (pcbddc->use_change_of_basis) {
2200       cum = n_vertices;
2201       for (i=0;i<sub_schurs->n_subs;i++) {
2202         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2203           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2204             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2205           }
2206         }
2207         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2208       }
2209     }
2210   }
2211 
2212   /* free index sets of faces, edges and vertices */
2213   for (i=0;i<n_ISForFaces;i++) {
2214     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2215   }
2216   if (n_ISForFaces) {
2217     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2218   }
2219   for (i=0;i<n_ISForEdges;i++) {
2220     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2221   }
2222   if (n_ISForEdges) {
2223     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2224   }
2225   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2226 
2227   /* map temp_indices_to_constraint in boundary numbering */
2228   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2229   if (i != temp_indices[total_counts]) {
2230     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2231   }
2232 
2233   /* set quantities in pcbddc data structure and store previous primal size */
2234   /* n_vertices defines the number of subdomain corners in the primal space */
2235   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2236   olocal_primal_size = pcbddc->local_primal_size;
2237   pcbddc->local_primal_size = total_counts;
2238   pcbddc->n_vertices = n_vertices;
2239   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2240 
2241   /* Create constraint matrix */
2242   /* The constraint matrix is used to compute the l2g map of primal dofs */
2243   /* so we need to set it up properly either with or without change of basis */
2244   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2245   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2246   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2247   /* array to compute a local numbering of constraints : vertices first then constraints */
2248   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2249   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2250   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2251   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2252   /* auxiliary stuff for basis change */
2253   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2254   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2255 
2256   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2257   total_primal_vertices=0;
2258   for (i=0;i<pcbddc->local_primal_size;i++) {
2259     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2260     if (size_of_constraint == 1) {
2261       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2262       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2263       aux_primal_minloc[total_primal_vertices]=0;
2264       total_primal_vertices++;
2265     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2266       PetscInt min_loc,min_index;
2267       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2268       /* find first untouched local node */
2269       k = 0;
2270       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2271       min_index = global_indices[k];
2272       min_loc = k;
2273       /* search the minimum among global nodes already untouched on the cc */
2274       for (k=1;k<size_of_constraint;k++) {
2275         /* there can be more than one constraint on a single connected component */
2276         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2277           min_index = global_indices[k];
2278           min_loc = k;
2279         }
2280       }
2281       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2282       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2283       aux_primal_minloc[total_primal_vertices]=min_loc;
2284       total_primal_vertices++;
2285     }
2286   }
2287   /* determine if a QR strategy is needed for change of basis */
2288   qr_needed = PETSC_FALSE;
2289   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2290   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2291     if (PetscBTLookup(change_basis,i)) {
2292       if (!pcbddc->use_qr_single) {
2293         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2294         j = 0;
2295         for (k=0;k<size_of_constraint;k++) {
2296           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2297             j++;
2298           }
2299         }
2300         /* found more than one primal dof on the cc */
2301         if (j > 1) {
2302           PetscBTSet(qr_needed_idx,i);
2303           qr_needed = PETSC_TRUE;
2304         }
2305       } else {
2306         PetscBTSet(qr_needed_idx,i);
2307         qr_needed = PETSC_TRUE;
2308       }
2309     }
2310   }
2311   /* free workspace */
2312   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2313 
2314   /* permute indices in order to have a sorted set of vertices */
2315   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2316 
2317   /* nonzero structure of constraint matrix */
2318   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2319   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2320   j=total_primal_vertices;
2321   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2322     if (!PetscBTLookup(change_basis,i)) {
2323       nnz[j]=temp_indices[i+1]-temp_indices[i];
2324       j++;
2325     }
2326   }
2327   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2328   ierr = PetscFree(nnz);CHKERRQ(ierr);
2329   /* set values in constraint matrix */
2330   for (i=0;i<total_primal_vertices;i++) {
2331     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2332   }
2333   total_counts = total_primal_vertices;
2334   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2335     if (!PetscBTLookup(change_basis,i)) {
2336       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2337       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2338       total_counts++;
2339     }
2340   }
2341   /* assembling */
2342   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   /*
2345   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2346   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2347   */
2348   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2349   if (pcbddc->use_change_of_basis) {
2350     /* dual and primal dofs on a single cc */
2351     PetscInt     dual_dofs,primal_dofs;
2352     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2353     PetscInt     primal_counter;
2354     /* working stuff for GEQRF */
2355     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2356     PetscBLASInt lqr_work;
2357     /* working stuff for UNGQR */
2358     PetscScalar  *gqr_work,lgqr_work_t;
2359     PetscBLASInt lgqr_work;
2360     /* working stuff for TRTRS */
2361     PetscScalar  *trs_rhs;
2362     PetscBLASInt Blas_NRHS;
2363     /* pointers for values insertion into change of basis matrix */
2364     PetscInt     *start_rows,*start_cols;
2365     PetscScalar  *start_vals;
2366     /* working stuff for values insertion */
2367     PetscBT      is_primal;
2368     /* matrix sizes */
2369     PetscInt     global_size,local_size;
2370     /* temporary change of basis */
2371     Mat          localChangeOfBasisMatrix;
2372 
2373     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2374     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2375     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2376     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2377     /* nonzeros for local mat */
2378     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2379     for (i=0;i<pcis->n;i++) nnz[i]=1;
2380     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2381       if (PetscBTLookup(change_basis,i)) {
2382         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2383         if (PetscBTLookup(qr_needed_idx,i)) {
2384           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2385         } else {
2386           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2387           /* get local primal index on the cc */
2388           j = 0;
2389           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2390           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2391         }
2392       }
2393     }
2394     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2395     ierr = PetscFree(nnz);CHKERRQ(ierr);
2396     /* Set initial identity in the matrix */
2397     for (i=0;i<pcis->n;i++) {
2398       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2399     }
2400 
2401     if (pcbddc->dbg_flag) {
2402       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2403       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2404     }
2405 
2406 
2407     /* Now we loop on the constraints which need a change of basis */
2408     /*
2409        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2410        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2411 
2412        Basic blocks of change of basis matrix T computed by
2413 
2414           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2415 
2416             | 1        0   ...        0         s_1/S |
2417             | 0        1   ...        0         s_2/S |
2418             |              ...                        |
2419             | 0        ...            1     s_{n-1}/S |
2420             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2421 
2422             with S = \sum_{i=1}^n s_i^2
2423             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2424                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2425 
2426           - QR decomposition of constraints otherwise
2427     */
2428     if (qr_needed) {
2429       /* space to store Q */
2430       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2431       /* first we issue queries for optimal work */
2432       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2433       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2434       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2435       lqr_work = -1;
2436       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2437       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2438       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2439       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2440       lgqr_work = -1;
2441       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2442       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2443       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2444       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2445       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2446       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2447       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2448       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2449       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2450       /* array to store scaling factors for reflectors */
2451       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2452       /* array to store rhs and solution of triangular solver */
2453       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2454       /* allocating workspace for check */
2455       if (pcbddc->dbg_flag) {
2456         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2457       }
2458     }
2459     /* array to store whether a node is primal or not */
2460     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2461     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2462     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2463     if (i != total_primal_vertices) {
2464       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2465     }
2466     for (i=0;i<total_primal_vertices;i++) {
2467       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2468     }
2469     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2470 
2471     /* loop on constraints and see whether or not they need a change of basis and compute it */
2472     /* -> using implicit ordering contained in temp_indices data */
2473     total_counts = pcbddc->n_vertices;
2474     primal_counter = total_counts;
2475     while (total_counts<pcbddc->local_primal_size) {
2476       primal_dofs = 1;
2477       if (PetscBTLookup(change_basis,total_counts)) {
2478         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2479         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2480           primal_dofs++;
2481         }
2482         /* get constraint info */
2483         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2484         dual_dofs = size_of_constraint-primal_dofs;
2485 
2486         if (pcbddc->dbg_flag) {
2487           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2488         }
2489 
2490         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2491 
2492           /* copy quadrature constraints for change of basis check */
2493           if (pcbddc->dbg_flag) {
2494             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2495           }
2496           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2497           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2498 
2499           /* compute QR decomposition of constraints */
2500           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2501           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2502           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2503           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2504           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2505           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2506           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2507 
2508           /* explictly compute R^-T */
2509           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2510           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2511           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2512           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2513           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2514           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2515           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2516           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2517           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2518           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2519 
2520           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2521           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2522           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2523           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2524           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2525           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2526           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2527           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2528           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2529 
2530           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2531              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2532              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2533           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2534           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2537           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2538           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2539           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2540           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2541           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2542           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2543 
2544           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2545           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2546           /* insert cols for primal dofs */
2547           for (j=0;j<primal_dofs;j++) {
2548             start_vals = &qr_basis[j*size_of_constraint];
2549             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2550             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2551           }
2552           /* insert cols for dual dofs */
2553           for (j=0,k=0;j<dual_dofs;k++) {
2554             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2555               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2556               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2557               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2558               j++;
2559             }
2560           }
2561 
2562           /* check change of basis */
2563           if (pcbddc->dbg_flag) {
2564             PetscInt   ii,jj;
2565             PetscBool valid_qr=PETSC_TRUE;
2566             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2567             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2568             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2569             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2570             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2571             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2572             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2573             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2574             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2575             for (jj=0;jj<size_of_constraint;jj++) {
2576               for (ii=0;ii<primal_dofs;ii++) {
2577                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2578                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2579               }
2580             }
2581             if (!valid_qr) {
2582               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2583               for (jj=0;jj<size_of_constraint;jj++) {
2584                 for (ii=0;ii<primal_dofs;ii++) {
2585                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2586                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2587                   }
2588                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2589                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2590                   }
2591                 }
2592               }
2593             } else {
2594               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2595             }
2596           }
2597         } else { /* simple transformation block */
2598           PetscInt    row,col;
2599           PetscScalar val,norm;
2600 
2601           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2602           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2603           for (j=0;j<size_of_constraint;j++) {
2604             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2605             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2606             if (!PetscBTLookup(is_primal,row_B)) {
2607               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2608               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2609               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2610             } else {
2611               for (k=0;k<size_of_constraint;k++) {
2612                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2613                 if (row != col) {
2614                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2615                 } else {
2616                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2617                 }
2618                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2619               }
2620             }
2621           }
2622           if (pcbddc->dbg_flag) {
2623             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2624           }
2625         }
2626         /* increment primal counter */
2627         primal_counter += primal_dofs;
2628       } else {
2629         if (pcbddc->dbg_flag) {
2630           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2631         }
2632       }
2633       /* increment constraint counter total_counts */
2634       total_counts += primal_dofs;
2635     }
2636 
2637     /* free workspace */
2638     if (qr_needed) {
2639       if (pcbddc->dbg_flag) {
2640         ierr = PetscFree(work);CHKERRQ(ierr);
2641       }
2642       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2643       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2644       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2645       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2646       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2647     }
2648     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2649     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651 
2652     /* assembling of global change of variable */
2653     {
2654       Mat      tmat;
2655       PetscInt bs;
2656 
2657       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2658       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2659       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2660       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2661       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2662       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2663       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2664       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2665       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2666       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2667       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2668       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2669       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2670       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2671       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2672       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2673       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2674       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2675     }
2676     /* check */
2677     if (pcbddc->dbg_flag) {
2678       PetscReal error;
2679       Vec       x,x_change;
2680 
2681       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2682       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2683       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2684       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2685       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2688       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2689       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2690       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2691       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2692       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2693       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2694       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2695       ierr = VecDestroy(&x);CHKERRQ(ierr);
2696       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2697     }
2698 
2699     /* adapt sub_schurs computed (if any) */
2700     if (pcbddc->use_deluxe_scaling) {
2701       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2702       if (sub_schurs->n_subs_par_g) {
2703         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2704       }
2705       if (sub_schurs->S_Ej_all) {
2706         Mat S_1,S_2,tmat;
2707         IS is_all_N;
2708 
2709         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2710         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2711         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2712         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2713         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2714         sub_schurs->S_Ej_all = S_1;
2715         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2716         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2717         sub_schurs->sum_S_Ej_all = S_2;
2718         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2719       }
2720     }
2721     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2722   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2723     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2724     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2725   }
2726 
2727   /* set up change of basis context */
2728   if (pcbddc->ChangeOfBasisMatrix) {
2729     PCBDDCChange_ctx change_ctx;
2730 
2731     if (!pcbddc->new_global_mat) {
2732       PetscInt global_size,local_size;
2733 
2734       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2735       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2736       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2737       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2738       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2739       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2740       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2741       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2742       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2743     } else {
2744       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2745       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2746       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2747     }
2748     if (!pcbddc->user_ChangeOfBasisMatrix) {
2749       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2750       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2751     } else {
2752       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2753       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2754     }
2755     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2756     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2757     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   }
2760 
2761   /* get indices in local ordering for vertices and constraints */
2762   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2763     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2764     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2765   }
2766   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2767   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2768   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2769   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2770   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2771   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2772   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2773   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2774   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2775   /* set quantities in PCBDDC data struct */
2776   pcbddc->n_actual_vertices = i;
2777   /* check if a new primal space has been introduced */
2778   pcbddc->new_primal_space_local = PETSC_TRUE;
2779   if (olocal_primal_size == pcbddc->local_primal_size) {
2780     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2781     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2782     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2783   }
2784   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2785   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2786 
2787   /* flush dbg viewer */
2788   if (pcbddc->dbg_flag) {
2789     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2790   }
2791 
2792   /* free workspace */
2793   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2794   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2795   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2796   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2797   if (!pcbddc->adaptive_selection) {
2798     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2799     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2800   } else {
2801     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2802                       pcbddc->adaptive_constraints_ptrs,
2803                       pcbddc->adaptive_constraints_idxs,
2804                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2805     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2806   }
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2812 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2813 {
2814   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2815   PC_IS       *pcis = (PC_IS*)pc->data;
2816   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2817   PetscInt    ierr,i,vertex_size;
2818   PetscViewer viewer=pcbddc->dbg_viewer;
2819 
2820   PetscFunctionBegin;
2821   /* Reset previously computed graph */
2822   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2823   /* Init local Graph struct */
2824   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2825 
2826   /* Check validity of the csr graph passed in by the user */
2827   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2828     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2829   }
2830 
2831   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2832   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2833     Mat       mat_adj;
2834     PetscInt  *xadj,*adjncy;
2835     PetscInt  nvtxs;
2836     PetscBool flg_row=PETSC_TRUE;
2837 
2838     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2839     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2840     if (!flg_row) {
2841       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2842     }
2843     if (pcbddc->use_local_adj) {
2844       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2845       pcbddc->computed_rowadj = PETSC_TRUE;
2846     } else { /* just compute subdomain's connected components */
2847       IS                     is_dummy;
2848       ISLocalToGlobalMapping l2gmap_dummy;
2849       PetscInt               j,sum;
2850       PetscInt               *cxadj,*cadjncy;
2851       const PetscInt         *idxs;
2852       PCBDDCGraph            graph;
2853       PetscBT                is_on_boundary;
2854 
2855       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2856       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2857       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2858       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2859       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2860       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2861       graph->xadj = xadj;
2862       graph->adjncy = adjncy;
2863       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2864       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2865 
2866       if (pcbddc->dbg_flag) {
2867         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2868         for (i=0;i<graph->ncc;i++) {
2869           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2870         }
2871         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2872       }
2873 
2874       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2875       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2876       for (i=0;i<pcis->n_B;i++) {
2877         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2878       }
2879       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2880 
2881       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2882       sum = 0;
2883       for (i=0;i<graph->ncc;i++) {
2884         PetscInt sizecc = 0;
2885         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2886           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2887             sizecc++;
2888           }
2889         }
2890         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2891           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2892             cxadj[graph->queue[j]] = sizecc;
2893           }
2894         }
2895         sum += sizecc*sizecc;
2896       }
2897       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2898       sum = 0;
2899       for (i=0;i<nvtxs;i++) {
2900         PetscInt temp = cxadj[i];
2901         cxadj[i] = sum;
2902         sum += temp;
2903       }
2904       cxadj[nvtxs] = sum;
2905       for (i=0;i<graph->ncc;i++) {
2906         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2907           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2908             PetscInt k,sizecc = 0;
2909             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2910               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2911                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2912                 sizecc++;
2913               }
2914             }
2915           }
2916         }
2917       }
2918       if (nvtxs) {
2919         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2920       } else {
2921         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2922         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2923       }
2924       graph->xadj = 0;
2925       graph->adjncy = 0;
2926       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2927       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2928     }
2929     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2930     if (!flg_row) {
2931       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2932     }
2933     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2934   }
2935 
2936   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2937   vertex_size = 1;
2938   if (pcbddc->user_provided_isfordofs) {
2939     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2940       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2941       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2942         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2943         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2944       }
2945       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2946       pcbddc->n_ISForDofs = 0;
2947       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2948     }
2949     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2950     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2951   } else {
2952     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2953       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2954       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2955       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2956         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2957       }
2958     }
2959   }
2960 
2961   /* Setup of Graph */
2962   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2963     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2964   }
2965   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2966     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2967   }
2968   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2969 
2970   /* Graph's connected components analysis */
2971   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2972 
2973   /* print some info to stdout */
2974   if (pcbddc->dbg_flag) {
2975     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2976   }
2977 
2978   /* mark topography has done */
2979   pcbddc->recompute_topography = PETSC_FALSE;
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 #undef __FUNCT__
2984 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2985 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2986 {
2987   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2988   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2989   PetscErrorCode ierr;
2990 
2991   PetscFunctionBegin;
2992   n = 0;
2993   vertices = 0;
2994   if (pcbddc->ConstraintMatrix) {
2995     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2996     for (i=0;i<local_primal_size;i++) {
2997       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2998       if (size_of_constraint == 1) n++;
2999       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3000     }
3001     if (vertices_idx) {
3002       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3003       n = 0;
3004       for (i=0;i<local_primal_size;i++) {
3005         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3006         if (size_of_constraint == 1) {
3007           vertices[n++]=row_cmat_indices[0];
3008         }
3009         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3010       }
3011     }
3012   }
3013   *n_vertices = n;
3014   if (vertices_idx) *vertices_idx = vertices;
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3020 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3021 {
3022   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3023   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3024   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3025   PetscBT        touched;
3026   PetscErrorCode ierr;
3027 
3028     /* This function assumes that the number of local constraints per connected component
3029        is not greater than the number of nodes defined for the connected component
3030        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3031   PetscFunctionBegin;
3032   n = 0;
3033   constraints_index = 0;
3034   if (pcbddc->ConstraintMatrix) {
3035     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3036     max_size_of_constraint = 0;
3037     for (i=0;i<local_primal_size;i++) {
3038       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3039       if (size_of_constraint > 1) {
3040         n++;
3041       }
3042       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3043       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3044     }
3045     if (constraints_idx) {
3046       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3047       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3048       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3049       n = 0;
3050       for (i=0;i<local_primal_size;i++) {
3051         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3052         if (size_of_constraint > 1) {
3053           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3054           /* find first untouched local node */
3055           j = 0;
3056           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3057           min_index = row_cmat_global_indices[j];
3058           min_loc = j;
3059           /* search the minimum among nodes not yet touched on the connected component
3060              since there can be more than one constraint on a single cc */
3061           for (j=1;j<size_of_constraint;j++) {
3062             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3063               min_index = row_cmat_global_indices[j];
3064               min_loc = j;
3065             }
3066           }
3067           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3068           constraints_index[n++] = row_cmat_indices[min_loc];
3069         }
3070         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3071       }
3072       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3073       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3074     }
3075   }
3076   *n_constraints = n;
3077   if (constraints_idx) *constraints_idx = constraints_index;
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 #undef __FUNCT__
3082 #define __FUNCT__ "PCBDDCSubsetNumbering"
3083 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3084 {
3085   Vec            local_vec,global_vec;
3086   IS             seqis,paris;
3087   VecScatter     scatter_ctx;
3088   PetscScalar    *array;
3089   PetscInt       *temp_global_dofs;
3090   PetscScalar    globalsum;
3091   PetscInt       i,j,s;
3092   PetscInt       nlocals,first_index,old_index,max_local;
3093   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3094   PetscMPIInt    *dof_sizes,*dof_displs;
3095   PetscBool      first_found;
3096   PetscErrorCode ierr;
3097 
3098   PetscFunctionBegin;
3099   /* mpi buffers */
3100   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3101   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3102   j = ( !rank_prec_comm ? size_prec_comm : 0);
3103   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3104   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3105   /* get maximum size of subset */
3106   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3107   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3108   max_local = 0;
3109   for (i=0;i<n_local_dofs;i++) {
3110     if (max_local < temp_global_dofs[i] ) {
3111       max_local = temp_global_dofs[i];
3112     }
3113   }
3114   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3115   max_global++;
3116   max_local = 0;
3117   for (i=0;i<n_local_dofs;i++) {
3118     if (max_local < local_dofs[i] ) {
3119       max_local = local_dofs[i];
3120     }
3121   }
3122   max_local++;
3123   /* allocate workspace */
3124   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3125   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3126   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3127   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3128   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3129   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3130   /* create scatter */
3131   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3132   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3133   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3134   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3135   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3136   /* init array */
3137   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3138   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3139   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3140   if (local_dofs_mult) {
3141     for (i=0;i<n_local_dofs;i++) {
3142       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3143     }
3144   } else {
3145     for (i=0;i<n_local_dofs;i++) {
3146       array[local_dofs[i]]=1.0;
3147     }
3148   }
3149   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3150   /* scatter into global vec and get total number of global dofs */
3151   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3152   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3153   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3154   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3155   /* Fill global_vec with cumulative function for global numbering */
3156   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3157   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3158   nlocals = 0;
3159   first_index = -1;
3160   first_found = PETSC_FALSE;
3161   for (i=0;i<s;i++) {
3162     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3163       first_found = PETSC_TRUE;
3164       first_index = i;
3165     }
3166     nlocals += (PetscInt)PetscRealPart(array[i]);
3167   }
3168   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3169   if (!rank_prec_comm) {
3170     dof_displs[0]=0;
3171     for (i=1;i<size_prec_comm;i++) {
3172       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3173     }
3174   }
3175   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3176   if (first_found) {
3177     array[first_index] += (PetscScalar)nlocals;
3178     old_index = first_index;
3179     for (i=first_index+1;i<s;i++) {
3180       if (PetscRealPart(array[i]) > 0.1) {
3181         array[i] += array[old_index];
3182         old_index = i;
3183       }
3184     }
3185   }
3186   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3187   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3188   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3189   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3190   /* get global ordering of local dofs */
3191   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3192   if (local_dofs_mult) {
3193     for (i=0;i<n_local_dofs;i++) {
3194       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3195     }
3196   } else {
3197     for (i=0;i<n_local_dofs;i++) {
3198       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3199     }
3200   }
3201   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3202   /* free workspace */
3203   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3204   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3205   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3206   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3207   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3208   /* return pointer to global ordering of local dofs */
3209   *global_numbering_subset = temp_global_dofs;
3210   PetscFunctionReturn(0);
3211 }
3212 
3213 #undef __FUNCT__
3214 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3215 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3216 {
3217   PetscInt       i,j;
3218   PetscScalar    *alphas;
3219   PetscErrorCode ierr;
3220 
3221   PetscFunctionBegin;
3222   /* this implements stabilized Gram-Schmidt */
3223   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3224   for (i=0;i<n;i++) {
3225     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3226     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3227     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3228   }
3229   ierr = PetscFree(alphas);CHKERRQ(ierr);
3230   PetscFunctionReturn(0);
3231 }
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatISGetSubassemblingPattern"
3235 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3236 {
3237   Mat             subdomain_adj;
3238   IS              new_ranks,ranks_send_to;
3239   MatPartitioning partitioner;
3240   Mat_IS          *matis;
3241   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3242   PetscInt        prank;
3243   PetscMPIInt     size,rank,color;
3244   PetscInt        *xadj,*adjncy,*oldranks;
3245   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3246   PetscInt        i,local_size,threshold=0;
3247   PetscErrorCode  ierr;
3248   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3249   PetscSubcomm    subcomm;
3250 
3251   PetscFunctionBegin;
3252   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3253   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3254   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3255 
3256   /* Get info on mapping */
3257   matis = (Mat_IS*)(mat->data);
3258   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3259   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3260 
3261   /* build local CSR graph of subdomains' connectivity */
3262   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3263   xadj[0] = 0;
3264   xadj[1] = PetscMax(n_neighs-1,0);
3265   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3266   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3267 
3268   if (threshold) {
3269     PetscInt xadj_count = 0;
3270     for (i=1;i<n_neighs;i++) {
3271       if (n_shared[i] > threshold) {
3272         adjncy[xadj_count] = neighs[i];
3273         adjncy_wgt[xadj_count] = n_shared[i];
3274         xadj_count++;
3275       }
3276     }
3277     xadj[1] = xadj_count;
3278   } else {
3279     if (xadj[1]) {
3280       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3281       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3282     }
3283   }
3284   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3285   if (use_square) {
3286     for (i=0;i<xadj[1];i++) {
3287       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3288     }
3289   }
3290   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3291 
3292   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3293 
3294   /*
3295     Restrict work on active processes only.
3296   */
3297   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3298   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3299   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3300   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3301   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3302   if (color) {
3303     ierr = PetscFree(xadj);CHKERRQ(ierr);
3304     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3305     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3306   } else {
3307     PetscInt coarsening_ratio;
3308     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3309     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3310     prank = rank;
3311     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3312     /*
3313     for (i=0;i<size;i++) {
3314       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3315     }
3316     */
3317     for (i=0;i<xadj[1];i++) {
3318       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3319     }
3320     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3321     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3322     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3323 
3324     /* Partition */
3325     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3326     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3327     if (use_vwgt) {
3328       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3329       v_wgt[0] = local_size;
3330       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3331     }
3332     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3333     coarsening_ratio = size/n_subdomains;
3334     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3335     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3336     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3337     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3338 
3339     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3340     if (contiguous) {
3341       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3342     } else {
3343       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3344     }
3345     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3346     /* clean up */
3347     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3348     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3349     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3350     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3351   }
3352   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3353 
3354   /* assemble parallel IS for sends */
3355   i = 1;
3356   if (color) i=0;
3357   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3358 
3359   /* get back IS */
3360   *is_sends = ranks_send_to;
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "MatISSubassemble"
3368 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3369 {
3370   Mat                    local_mat;
3371   Mat_IS                 *matis;
3372   IS                     is_sends_internal;
3373   PetscInt               rows,cols,new_local_rows;
3374   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3375   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3376   ISLocalToGlobalMapping l2gmap;
3377   PetscInt*              l2gmap_indices;
3378   const PetscInt*        is_indices;
3379   MatType                new_local_type;
3380   /* buffers */
3381   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3382   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3383   PetscInt               *recv_buffer_idxs_local;
3384   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3385   /* MPI */
3386   MPI_Comm               comm,comm_n;
3387   PetscSubcomm           subcomm;
3388   PetscMPIInt            n_sends,n_recvs,commsize;
3389   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3390   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3391   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3392   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3393   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3394   PetscErrorCode         ierr;
3395 
3396   PetscFunctionBegin;
3397   /* TODO: add missing checks */
3398   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3399   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3400   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3401   PetscValidLogicalCollectiveInt(mat,nis,7);
3402   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3403   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3404   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3405   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3406   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3407   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3408   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3409   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3410     PetscInt mrows,mcols,mnrows,mncols;
3411     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3412     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3413     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3414     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3415     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3416     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3417   }
3418   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3419   PetscValidLogicalCollectiveInt(mat,bs,0);
3420   /* prepare IS for sending if not provided */
3421   if (!is_sends) {
3422     PetscBool pcontig = PETSC_TRUE;
3423     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3424     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3425   } else {
3426     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3427     is_sends_internal = is_sends;
3428   }
3429 
3430   /* get pointer of MATIS data */
3431   matis = (Mat_IS*)mat->data;
3432 
3433   /* get comm */
3434   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3435 
3436   /* compute number of sends */
3437   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3438   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3439 
3440   /* compute number of receives */
3441   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3442   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3443   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3444   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3445   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3446   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3447   ierr = PetscFree(iflags);CHKERRQ(ierr);
3448 
3449   /* restrict comm if requested */
3450   subcomm = 0;
3451   destroy_mat = PETSC_FALSE;
3452   if (restrict_comm) {
3453     PetscMPIInt color,subcommsize;
3454 
3455     color = 0;
3456     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3457     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3458     subcommsize = commsize - subcommsize;
3459     /* check if reuse has been requested */
3460     if (reuse == MAT_REUSE_MATRIX) {
3461       if (*mat_n) {
3462         PetscMPIInt subcommsize2;
3463         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3464         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3465         comm_n = PetscObjectComm((PetscObject)*mat_n);
3466       } else {
3467         comm_n = PETSC_COMM_SELF;
3468       }
3469     } else { /* MAT_INITIAL_MATRIX */
3470       PetscMPIInt rank;
3471 
3472       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3473       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3474       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3475       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3476       comm_n = subcomm->comm;
3477     }
3478     /* flag to destroy *mat_n if not significative */
3479     if (color) destroy_mat = PETSC_TRUE;
3480   } else {
3481     comm_n = comm;
3482   }
3483 
3484   /* prepare send/receive buffers */
3485   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3486   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3487   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3488   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3489   if (nis) {
3490     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3491   }
3492 
3493   /* Get data from local matrices */
3494   if (!isdense) {
3495     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3496     /* TODO: See below some guidelines on how to prepare the local buffers */
3497     /*
3498        send_buffer_vals should contain the raw values of the local matrix
3499        send_buffer_idxs should contain:
3500        - MatType_PRIVATE type
3501        - PetscInt        size_of_l2gmap
3502        - PetscInt        global_row_indices[size_of_l2gmap]
3503        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3504     */
3505   } else {
3506     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3507     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3508     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3509     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3510     send_buffer_idxs[1] = i;
3511     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3512     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3513     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3514     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3515     for (i=0;i<n_sends;i++) {
3516       ilengths_vals[is_indices[i]] = len*len;
3517       ilengths_idxs[is_indices[i]] = len+2;
3518     }
3519   }
3520   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3521   /* additional is (if any) */
3522   if (nis) {
3523     PetscMPIInt psum;
3524     PetscInt j;
3525     for (j=0,psum=0;j<nis;j++) {
3526       PetscInt plen;
3527       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3528       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3529       psum += len+1; /* indices + lenght */
3530     }
3531     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3532     for (j=0,psum=0;j<nis;j++) {
3533       PetscInt plen;
3534       const PetscInt *is_array_idxs;
3535       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3536       send_buffer_idxs_is[psum] = plen;
3537       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3538       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3539       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3540       psum += plen+1; /* indices + lenght */
3541     }
3542     for (i=0;i<n_sends;i++) {
3543       ilengths_idxs_is[is_indices[i]] = psum;
3544     }
3545     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3546   }
3547 
3548   buf_size_idxs = 0;
3549   buf_size_vals = 0;
3550   buf_size_idxs_is = 0;
3551   for (i=0;i<n_recvs;i++) {
3552     buf_size_idxs += (PetscInt)olengths_idxs[i];
3553     buf_size_vals += (PetscInt)olengths_vals[i];
3554     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3555   }
3556   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3557   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3558   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3559 
3560   /* get new tags for clean communications */
3561   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3562   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3563   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3564 
3565   /* allocate for requests */
3566   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3567   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3568   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3569   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3570   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3571   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3572 
3573   /* communications */
3574   ptr_idxs = recv_buffer_idxs;
3575   ptr_vals = recv_buffer_vals;
3576   ptr_idxs_is = recv_buffer_idxs_is;
3577   for (i=0;i<n_recvs;i++) {
3578     source_dest = onodes[i];
3579     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3580     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3581     ptr_idxs += olengths_idxs[i];
3582     ptr_vals += olengths_vals[i];
3583     if (nis) {
3584       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3585       ptr_idxs_is += olengths_idxs_is[i];
3586     }
3587   }
3588   for (i=0;i<n_sends;i++) {
3589     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3590     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3591     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3592     if (nis) {
3593       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3594     }
3595   }
3596   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3597   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3598 
3599   /* assemble new l2g map */
3600   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3601   ptr_idxs = recv_buffer_idxs;
3602   new_local_rows = 0;
3603   for (i=0;i<n_recvs;i++) {
3604     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3605     ptr_idxs += olengths_idxs[i];
3606   }
3607   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3608   ptr_idxs = recv_buffer_idxs;
3609   new_local_rows = 0;
3610   for (i=0;i<n_recvs;i++) {
3611     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3612     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3613     ptr_idxs += olengths_idxs[i];
3614   }
3615   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3616   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3617   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3618 
3619   /* infer new local matrix type from received local matrices type */
3620   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3621   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3622   if (n_recvs) {
3623     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3624     ptr_idxs = recv_buffer_idxs;
3625     for (i=0;i<n_recvs;i++) {
3626       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3627         new_local_type_private = MATAIJ_PRIVATE;
3628         break;
3629       }
3630       ptr_idxs += olengths_idxs[i];
3631     }
3632     switch (new_local_type_private) {
3633       case MATDENSE_PRIVATE:
3634         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3635           new_local_type = MATSEQAIJ;
3636           bs = 1;
3637         } else { /* if I receive only 1 dense matrix */
3638           new_local_type = MATSEQDENSE;
3639           bs = 1;
3640         }
3641         break;
3642       case MATAIJ_PRIVATE:
3643         new_local_type = MATSEQAIJ;
3644         bs = 1;
3645         break;
3646       case MATBAIJ_PRIVATE:
3647         new_local_type = MATSEQBAIJ;
3648         break;
3649       case MATSBAIJ_PRIVATE:
3650         new_local_type = MATSEQSBAIJ;
3651         break;
3652       default:
3653         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3654         break;
3655     }
3656   } else { /* by default, new_local_type is seqdense */
3657     new_local_type = MATSEQDENSE;
3658     bs = 1;
3659   }
3660 
3661   /* create MATIS object if needed */
3662   if (reuse == MAT_INITIAL_MATRIX) {
3663     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3664     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3665   } else {
3666     /* it also destroys the local matrices */
3667     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3668   }
3669   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3670   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3671 
3672   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3673 
3674   /* Global to local map of received indices */
3675   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3676   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3677   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3678 
3679   /* restore attributes -> type of incoming data and its size */
3680   buf_size_idxs = 0;
3681   for (i=0;i<n_recvs;i++) {
3682     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3683     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3684     buf_size_idxs += (PetscInt)olengths_idxs[i];
3685   }
3686   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3687 
3688   /* set preallocation */
3689   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3690   if (!newisdense) {
3691     PetscInt *new_local_nnz=0;
3692 
3693     ptr_vals = recv_buffer_vals;
3694     ptr_idxs = recv_buffer_idxs_local;
3695     if (n_recvs) {
3696       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3697     }
3698     for (i=0;i<n_recvs;i++) {
3699       PetscInt j;
3700       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3701         for (j=0;j<*(ptr_idxs+1);j++) {
3702           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3703         }
3704       } else {
3705         /* TODO */
3706       }
3707       ptr_idxs += olengths_idxs[i];
3708     }
3709     if (new_local_nnz) {
3710       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3711       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3712       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3713       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3714       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3715       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3716     } else {
3717       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3718     }
3719     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3720   } else {
3721     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3722   }
3723 
3724   /* set values */
3725   ptr_vals = recv_buffer_vals;
3726   ptr_idxs = recv_buffer_idxs_local;
3727   for (i=0;i<n_recvs;i++) {
3728     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3729       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3730       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3731       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3732       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3733       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3734     } else {
3735       /* TODO */
3736     }
3737     ptr_idxs += olengths_idxs[i];
3738     ptr_vals += olengths_vals[i];
3739   }
3740   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3742   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3744   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3745   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3746 
3747 #if 0
3748   if (!restrict_comm) { /* check */
3749     Vec       lvec,rvec;
3750     PetscReal infty_error;
3751 
3752     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3753     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3754     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3755     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3756     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3757     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3758     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3759     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3760     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3761   }
3762 #endif
3763 
3764   /* assemble new additional is (if any) */
3765   if (nis) {
3766     PetscInt **temp_idxs,*count_is,j,psum;
3767 
3768     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3769     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3770     ptr_idxs = recv_buffer_idxs_is;
3771     psum = 0;
3772     for (i=0;i<n_recvs;i++) {
3773       for (j=0;j<nis;j++) {
3774         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3775         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3776         psum += plen;
3777         ptr_idxs += plen+1; /* shift pointer to received data */
3778       }
3779     }
3780     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3781     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3782     for (i=1;i<nis;i++) {
3783       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3784     }
3785     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3786     ptr_idxs = recv_buffer_idxs_is;
3787     for (i=0;i<n_recvs;i++) {
3788       for (j=0;j<nis;j++) {
3789         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3790         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3791         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3792         ptr_idxs += plen+1; /* shift pointer to received data */
3793       }
3794     }
3795     for (i=0;i<nis;i++) {
3796       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3797       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3798       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3799     }
3800     ierr = PetscFree(count_is);CHKERRQ(ierr);
3801     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3802     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3803   }
3804   /* free workspace */
3805   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3806   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3807   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3808   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3809   if (isdense) {
3810     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3811     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3812   } else {
3813     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3814   }
3815   if (nis) {
3816     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3817     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3818   }
3819   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3820   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3821   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3822   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3823   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3824   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3825   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3826   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3827   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3828   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3829   ierr = PetscFree(onodes);CHKERRQ(ierr);
3830   if (nis) {
3831     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3832     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3833     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3834   }
3835   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3836   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3837     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3838     for (i=0;i<nis;i++) {
3839       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3840     }
3841   }
3842   PetscFunctionReturn(0);
3843 }
3844 
3845 /* temporary hack into ksp private data structure */
3846 #include <petsc-private/kspimpl.h>
3847 
3848 #undef __FUNCT__
3849 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3850 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3851 {
3852   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3853   PC_IS                  *pcis = (PC_IS*)pc->data;
3854   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3855   MatNullSpace           CoarseNullSpace=NULL;
3856   ISLocalToGlobalMapping coarse_islg;
3857   IS                     coarse_is,*isarray;
3858   PetscInt               i,im_active=-1,active_procs=-1;
3859   PetscInt               nis,nisdofs,nisneu;
3860   PC                     pc_temp;
3861   PCType                 coarse_pc_type;
3862   KSPType                coarse_ksp_type;
3863   PetscBool              multilevel_requested,multilevel_allowed;
3864   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3865   Mat                    t_coarse_mat_is;
3866   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3867   PetscMPIInt            all_procs;
3868   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3869   PetscBool              compute_vecs = PETSC_FALSE;
3870   PetscScalar            *array;
3871   PetscErrorCode         ierr;
3872 
3873   PetscFunctionBegin;
3874   /* Assign global numbering to coarse dofs */
3875   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3876     compute_vecs = PETSC_TRUE;
3877     PetscInt ocoarse_size;
3878     ocoarse_size = pcbddc->coarse_size;
3879     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3880     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3881     /* see if we can avoid some work */
3882     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3883       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3884         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3885         coarse_reuse = PETSC_FALSE;
3886       } else { /* we can safely reuse already computed coarse matrix */
3887         coarse_reuse = PETSC_TRUE;
3888       }
3889     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3890       coarse_reuse = PETSC_FALSE;
3891     }
3892     /* reset any subassembling information */
3893     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3894     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3895   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3896     coarse_reuse = PETSC_TRUE;
3897   }
3898 
3899   /* count "active" (i.e. with positive local size) and "void" processes */
3900   im_active = !!(pcis->n);
3901   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3902   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3903   void_procs = all_procs-active_procs;
3904   csin_type_simple = PETSC_TRUE;
3905   redist = PETSC_FALSE;
3906   if (pcbddc->current_level && void_procs) {
3907     csin_ml = PETSC_TRUE;
3908     ncoarse_ml = void_procs;
3909     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3910     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3911       csin_ds = PETSC_TRUE;
3912       ncoarse_ds = pcbddc->redistribute_coarse;
3913       redist = PETSC_TRUE;
3914     } else {
3915       csin_ds = PETSC_TRUE;
3916       ncoarse_ds = active_procs;
3917       redist = PETSC_TRUE;
3918     }
3919   } else {
3920     csin_ml = PETSC_FALSE;
3921     ncoarse_ml = all_procs;
3922     if (void_procs) {
3923       csin_ds = PETSC_TRUE;
3924       ncoarse_ds = void_procs;
3925       csin_type_simple = PETSC_FALSE;
3926     } else {
3927       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3928         csin_ds = PETSC_TRUE;
3929         ncoarse_ds = pcbddc->redistribute_coarse;
3930         redist = PETSC_TRUE;
3931       } else {
3932         csin_ds = PETSC_FALSE;
3933         ncoarse_ds = all_procs;
3934       }
3935     }
3936   }
3937 
3938   /*
3939     test if we can go multilevel: three conditions must be satisfied:
3940     - we have not exceeded the number of levels requested
3941     - we can actually subassemble the active processes
3942     - we can find a suitable number of MPI processes where we can place the subassembled problem
3943   */
3944   multilevel_allowed = PETSC_FALSE;
3945   multilevel_requested = PETSC_FALSE;
3946   if (pcbddc->current_level < pcbddc->max_levels) {
3947     multilevel_requested = PETSC_TRUE;
3948     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3949       multilevel_allowed = PETSC_FALSE;
3950     } else {
3951       multilevel_allowed = PETSC_TRUE;
3952     }
3953   }
3954   /* determine number of process partecipating to coarse solver */
3955   if (multilevel_allowed) {
3956     ncoarse = ncoarse_ml;
3957     csin = csin_ml;
3958   } else {
3959     ncoarse = ncoarse_ds;
3960     csin = csin_ds;
3961   }
3962 
3963   /* creates temporary l2gmap and IS for coarse indexes */
3964   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3965   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3966 
3967   /* creates temporary MATIS object for coarse matrix */
3968   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3969   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3970   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3971   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3972 #if 0
3973   {
3974     PetscViewer viewer;
3975     char filename[256];
3976     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3977     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3978     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3979     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3980     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3981   }
3982 #endif
3983   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3984   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3985   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3986   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3987   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3988 
3989   /* compute dofs splitting and neumann boundaries for coarse dofs */
3990   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3991     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3992     const PetscInt         *idxs;
3993     ISLocalToGlobalMapping tmap;
3994 
3995     /* create map between primal indices (in local representative ordering) and local primal numbering */
3996     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3997     /* allocate space for temporary storage */
3998     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3999     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4000     /* allocate for IS array */
4001     nisdofs = pcbddc->n_ISForDofsLocal;
4002     nisneu = !!pcbddc->NeumannBoundariesLocal;
4003     nis = nisdofs + nisneu;
4004     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4005     /* dofs splitting */
4006     for (i=0;i<nisdofs;i++) {
4007       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4008       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4009       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4010       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4011       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4012       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4013       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4014       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4015     }
4016     /* neumann boundaries */
4017     if (pcbddc->NeumannBoundariesLocal) {
4018       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4019       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4020       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4021       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4022       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4023       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4024       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4025       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4026     }
4027     /* free memory */
4028     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4029     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4030     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4031   } else {
4032     nis = 0;
4033     nisdofs = 0;
4034     nisneu = 0;
4035     isarray = NULL;
4036   }
4037   /* destroy no longer needed map */
4038   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4039 
4040   /* restrict on coarse candidates (if needed) */
4041   coarse_mat_is = NULL;
4042   if (csin) {
4043     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4044       if (redist) {
4045         PetscMPIInt rank;
4046         PetscInt    spc,n_spc_p1,dest[1],destsize;
4047 
4048         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4049         spc = active_procs/pcbddc->redistribute_coarse;
4050         n_spc_p1 = active_procs%pcbddc->redistribute_coarse;
4051         if (im_active) {
4052           destsize = 1;
4053           if (rank > n_spc_p1*(spc+1)-1) {
4054             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4055           } else {
4056             dest[0] = rank/(spc+1);
4057           }
4058         } else {
4059           destsize = 0;
4060         }
4061         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4062       } else if (csin_type_simple) {
4063         PetscMPIInt rank;
4064         PetscInt    issize,isidx;
4065 
4066         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4067         if (im_active) {
4068           issize = 1;
4069           isidx = (PetscInt)rank;
4070         } else {
4071           issize = 0;
4072           isidx = -1;
4073         }
4074         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4075       } else { /* get a suitable subassembling pattern from MATIS code */
4076         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4077       }
4078 
4079       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4080       if (!redist || ncoarse <= void_procs) {
4081         PetscInt ncoarse_cand,tissize,*nisindices;
4082         PetscInt *coarse_candidates;
4083         const PetscInt* tisindices;
4084 
4085         /* get coarse candidates' ranks in pc communicator */
4086         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4087         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4088         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4089           if (!coarse_candidates[i]) {
4090             coarse_candidates[ncoarse_cand++]=i;
4091           }
4092         }
4093         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4094 
4095 
4096         if (pcbddc->dbg_flag) {
4097           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4098           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4099           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4100           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4101           for (i=0;i<ncoarse_cand;i++) {
4102             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4103           }
4104           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4105           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4106         }
4107         /* shift the pattern on coarse candidates */
4108         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4109         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4110         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4111         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4112         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4113         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4114         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4115       }
4116       if (pcbddc->dbg_flag) {
4117         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4118         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4119         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4120         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4121       }
4122     }
4123     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4124     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4125   } else {
4126     if (pcbddc->dbg_flag) {
4127       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4128       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4129       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4130     }
4131     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4132     coarse_mat_is = t_coarse_mat_is;
4133   }
4134 
4135   /* create local to global scatters for coarse problem */
4136   if (compute_vecs) {
4137     PetscInt lrows;
4138     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4139     if (coarse_mat_is) {
4140       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4141     } else {
4142       lrows = 0;
4143     }
4144     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4145     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4146     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4147     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4148     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4149   }
4150   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4151   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4152 
4153   /* set defaults for coarse KSP and PC */
4154   if (multilevel_allowed) {
4155     coarse_ksp_type = KSPRICHARDSON;
4156     coarse_pc_type = PCBDDC;
4157   } else {
4158     coarse_ksp_type = KSPPREONLY;
4159     coarse_pc_type = PCREDUNDANT;
4160   }
4161 
4162   /* print some info if requested */
4163   if (pcbddc->dbg_flag) {
4164     if (!multilevel_allowed) {
4165       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4166       if (multilevel_requested) {
4167         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4168       } else if (pcbddc->max_levels) {
4169         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4170       }
4171       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4172     }
4173   }
4174 
4175   /* create the coarse KSP object only once with defaults */
4176   if (coarse_mat_is) {
4177     MatReuse coarse_mat_reuse;
4178     PetscViewer dbg_viewer = NULL;
4179     if (pcbddc->dbg_flag) {
4180       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4181       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4182     }
4183     if (!pcbddc->coarse_ksp) {
4184       char prefix[256],str_level[16];
4185       size_t len;
4186       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4187       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4188       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4189       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4190       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4191       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4192       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4193       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4194       /* prefix */
4195       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4196       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4197       if (!pcbddc->current_level) {
4198         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4199         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4200       } else {
4201         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4202         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4203         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4204         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4205         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4206         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4207       }
4208       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4209       /* allow user customization */
4210       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4211       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4212     }
4213 
4214     /* get some info after set from options */
4215     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4216     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4217     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4218     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4219     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4220       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4221       isbddc = PETSC_FALSE;
4222     }
4223     if (isredundant) {
4224       KSP inner_ksp;
4225       PC inner_pc;
4226       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4227       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4228       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4229     }
4230 
4231     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4232     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4233     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4234     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4235     if (nisdofs) {
4236       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4237       for (i=0;i<nisdofs;i++) {
4238         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4239       }
4240     }
4241     if (nisneu) {
4242       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4243       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4244     }
4245 
4246     /* assemble coarse matrix */
4247     if (coarse_reuse) {
4248       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4249       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4250       coarse_mat_reuse = MAT_REUSE_MATRIX;
4251     } else {
4252       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4253     }
4254     if (isbddc || isnn) {
4255       if (pcbddc->coarsening_ratio > 1) {
4256         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4257           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4258           if (pcbddc->dbg_flag) {
4259             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4260             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4261             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4262             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4263           }
4264         }
4265         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4266       } else {
4267         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4268         coarse_mat = coarse_mat_is;
4269       }
4270     } else {
4271       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4272     }
4273     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4274 
4275     /* propagate symmetry info to coarse matrix */
4276     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4277     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4278 
4279     /* set operators */
4280     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4281     if (pcbddc->dbg_flag) {
4282       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4283     }
4284   } else { /* processes non partecipating to coarse solver (if any) */
4285     coarse_mat = 0;
4286   }
4287   ierr = PetscFree(isarray);CHKERRQ(ierr);
4288 #if 0
4289   {
4290     PetscViewer viewer;
4291     char filename[256];
4292     sprintf(filename,"coarse_mat.m");
4293     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4294     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4295     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4296     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4297   }
4298 #endif
4299 
4300   /* Compute coarse null space (special handling by BDDC only) */
4301   if (pcbddc->NullSpace) {
4302     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4303   }
4304 
4305   if (pcbddc->coarse_ksp) {
4306     Vec crhs,csol;
4307     PetscBool ispreonly;
4308     if (CoarseNullSpace) {
4309       if (isbddc) {
4310         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4311       } else {
4312         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4313       }
4314     }
4315     /* setup coarse ksp */
4316     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4317     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4318     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4319     /* hack */
4320     if (!csol) {
4321       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4322     }
4323     if (!crhs) {
4324       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4325     }
4326     /* Check coarse problem if in debug mode or if solving with an iterative method */
4327     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4328     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4329       KSP       check_ksp;
4330       KSPType   check_ksp_type;
4331       PC        check_pc;
4332       Vec       check_vec,coarse_vec;
4333       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4334       PetscInt  its;
4335       PetscBool compute_eigs;
4336       PetscReal *eigs_r,*eigs_c;
4337       PetscInt  neigs;
4338       const char *prefix;
4339 
4340       /* Create ksp object suitable for estimation of extreme eigenvalues */
4341       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4342       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4343       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4344       if (ispreonly) {
4345         check_ksp_type = KSPPREONLY;
4346         compute_eigs = PETSC_FALSE;
4347       } else {
4348         check_ksp_type = KSPGMRES;
4349         compute_eigs = PETSC_TRUE;
4350       }
4351       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4352       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4353       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4354       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4355       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4356       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4357       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4358       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4359       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4360       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4361       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4362       /* create random vec */
4363       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4364       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4365       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4366       if (CoarseNullSpace) {
4367         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4368       }
4369       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4370       /* solve coarse problem */
4371       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4372       if (CoarseNullSpace) {
4373         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4374       }
4375       /* set eigenvalue estimation if preonly has not been requested */
4376       if (compute_eigs) {
4377         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4378         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4379         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4380         lambda_max = eigs_r[neigs-1];
4381         lambda_min = eigs_r[0];
4382         if (pcbddc->use_coarse_estimates) {
4383           if (lambda_max>lambda_min) {
4384             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4385             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4386           }
4387         }
4388       }
4389 
4390       /* check coarse problem residual error */
4391       if (pcbddc->dbg_flag) {
4392         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4393         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4394         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4395         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4396         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4397         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4398         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4399         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4400         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4401         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4402         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4403         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4404         if (compute_eigs) {
4405           PetscReal lambda_max_s,lambda_min_s;
4406           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4407           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4408           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4409           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4410           for (i=0;i<neigs;i++) {
4411             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4412           }
4413         }
4414         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4415         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4416       }
4417       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4418       if (compute_eigs) {
4419         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4420         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4421       }
4422     }
4423   }
4424   /* print additional info */
4425   if (pcbddc->dbg_flag) {
4426     /* waits until all processes reaches this point */
4427     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4428     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4429     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4430   }
4431 
4432   /* free memory */
4433   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4434   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 #undef __FUNCT__
4439 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4440 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4441 {
4442   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4443   PC_IS*         pcis = (PC_IS*)pc->data;
4444   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4445   PetscInt       i,coarse_size;
4446   PetscInt       *local_primal_indices;
4447   PetscErrorCode ierr;
4448 
4449   PetscFunctionBegin;
4450   /* Compute global number of coarse dofs */
4451   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4452     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4453   }
4454   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4455 
4456   /* check numbering */
4457   if (pcbddc->dbg_flag) {
4458     PetscScalar coarsesum,*array;
4459     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4460 
4461     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4462     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4463     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4464     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4465     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4466     for (i=0;i<pcbddc->local_primal_size;i++) {
4467       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4468     }
4469     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4470     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4471     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4472     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4473     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4474     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4475     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4476     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4477     for (i=0;i<pcis->n;i++) {
4478       if (array[i] == 1.0) {
4479         set_error = PETSC_TRUE;
4480         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4481       }
4482     }
4483     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4484     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4485     for (i=0;i<pcis->n;i++) {
4486       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4487     }
4488     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4489     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4490     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4491     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4492     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4493     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4494     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4495       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4496       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4497       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4498       for (i=0;i<pcbddc->local_primal_size;i++) {
4499         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4500       }
4501       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4502     }
4503     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4504     if (set_error_reduced) {
4505       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4506     }
4507   }
4508   /* get back data */
4509   *coarse_size_n = coarse_size;
4510   *local_primal_indices_n = local_primal_indices;
4511   PetscFunctionReturn(0);
4512 }
4513 
4514 #undef __FUNCT__
4515 #define __FUNCT__ "PCBDDCGlobalToLocal"
4516 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4517 {
4518   IS             localis_t;
4519   PetscInt       i,lsize,*idxs,n;
4520   PetscScalar    *vals;
4521   PetscErrorCode ierr;
4522 
4523   PetscFunctionBegin;
4524   /* get indices in local ordering exploiting local to global map */
4525   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4526   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4527   for (i=0;i<lsize;i++) vals[i] = 1.0;
4528   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4529   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4530   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4531   if (idxs) { /* multilevel guard */
4532     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4533   }
4534   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4535   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4536   ierr = PetscFree(vals);CHKERRQ(ierr);
4537   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4538   /* now compute set in local ordering */
4539   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4540   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4541   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4542   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4543   for (i=0,lsize=0;i<n;i++) {
4544     if (PetscRealPart(vals[i]) > 0.5) {
4545       lsize++;
4546     }
4547   }
4548   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4549   for (i=0,lsize=0;i<n;i++) {
4550     if (PetscRealPart(vals[i]) > 0.5) {
4551       idxs[lsize++] = i;
4552     }
4553   }
4554   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4555   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4556   *localis = localis_t;
4557   PetscFunctionReturn(0);
4558 }
4559 
4560 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4561 #undef __FUNCT__
4562 #define __FUNCT__ "PCBDDCMatMult_Private"
4563 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4564 {
4565   PCBDDCChange_ctx change_ctx;
4566   PetscErrorCode   ierr;
4567 
4568   PetscFunctionBegin;
4569   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4570   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4571   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4572   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4573   PetscFunctionReturn(0);
4574 }
4575 
4576 #undef __FUNCT__
4577 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4578 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4579 {
4580   PCBDDCChange_ctx change_ctx;
4581   PetscErrorCode   ierr;
4582 
4583   PetscFunctionBegin;
4584   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4585   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4586   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4587   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4588   PetscFunctionReturn(0);
4589 }
4590 
4591 #undef __FUNCT__
4592 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4593 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4594 {
4595   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4596   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4597   PetscInt            *used_xadj,*used_adjncy;
4598   PetscBool           free_used_adj;
4599   PetscErrorCode      ierr;
4600 
4601   PetscFunctionBegin;
4602   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4603   free_used_adj = PETSC_FALSE;
4604   if (pcbddc->sub_schurs_layers == -1) {
4605     used_xadj = NULL;
4606     used_adjncy = NULL;
4607   } else {
4608     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4609       used_xadj = pcbddc->mat_graph->xadj;
4610       used_adjncy = pcbddc->mat_graph->adjncy;
4611     } else if (pcbddc->computed_rowadj) {
4612       used_xadj = pcbddc->mat_graph->xadj;
4613       used_adjncy = pcbddc->mat_graph->adjncy;
4614     } else {
4615       Mat            mat_adj;
4616       PetscBool      flg_row=PETSC_TRUE;
4617       const PetscInt *xadj,*adjncy;
4618       PetscInt       nvtxs;
4619 
4620       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4621       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4622       if (!flg_row) {
4623         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4624       }
4625       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4626       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4627       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4628       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4629       if (!flg_row) {
4630         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4631       }
4632       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4633       free_used_adj = PETSC_TRUE;
4634     }
4635   }
4636   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4637 
4638   /* free adjacency */
4639   if (free_used_adj) {
4640     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4641   }
4642   PetscFunctionReturn(0);
4643 }
4644 
4645 #undef __FUNCT__
4646 #define __FUNCT__ "PCBDDCInitSubSchurs"
4647 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4648 {
4649   PC_IS               *pcis=(PC_IS*)pc->data;
4650   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4651   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4652   PCBDDCGraph         graph;
4653   Mat                 S_j;
4654   PetscErrorCode      ierr;
4655 
4656   PetscFunctionBegin;
4657   /* attach interface graph for determining subsets */
4658   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4659     IS verticesIS;
4660 
4661     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4662     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4663     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4664     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4665     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4666     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4667 /*
4668     if (pcbddc->dbg_flag) {
4669       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4670     }
4671 */
4672   } else {
4673     graph = pcbddc->mat_graph;
4674   }
4675 
4676   /* Create Schur complement matrix */
4677   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4678   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4679 
4680   /* sub_schurs init */
4681   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4682   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4683   /* free graph struct */
4684   if (pcbddc->sub_schurs_rebuild) {
4685     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4686   }
4687   PetscFunctionReturn(0);
4688 }
4689