xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 3975b054dc2ded1a903f339d6a1c859863e3c05f)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > -1 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       cum += subset_size*n;
66       cum2 += n;
67       maxneigs = PetscMax(maxneigs,n);
68     }
69   }
70 
71   if (mss) {
72     if (symmetric) {
73       PetscBLASInt B_itype = 1;
74       PetscBLASInt B_N = mss;
75       PetscScalar  zero = 0.0;
76       PetscScalar  eps = 0.0; /* dlamch? */
77 
78       B_lwork = -1;
79       S = NULL;
80       St = NULL;
81       eigs = NULL;
82       eigv = NULL;
83       B_iwork = NULL;
84       B_ifail = NULL;
85       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
86 #if defined(PETSC_USE_COMPLEX)
87       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
88 #else
89       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
90 #endif
91       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
92       ierr = PetscFPTrapPop();CHKERRQ(ierr);
93     } else {
94         /* TODO */
95     }
96   } else {
97     lwork = 0;
98   }
99 
100   nv = 0;
101   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
102     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
103   }
104   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
105   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
106                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
107 #if defined(PETSC_USE_COMPLEX)
108   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
109 #endif
110   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
111   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
112                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
113                       nv+cum,&pcbddc->adaptive_constraints_idxs,
114                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
115   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
116 
117   maxneigs = 0;
118   cum = cum2 = cumarray = 0;
119   if (sub_schurs->is_Ej_com) {
120     const PetscInt *idxs;
121 
122     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
123     for (cum=0;cum<nv;cum++) {
124       pcbddc->adaptive_constraints_n[cum] = 1;
125       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
126       pcbddc->adaptive_constraints_ptrs[cum] = cum;
127       pcbddc->adaptive_constraints_data[cum] = 1.0;
128     }
129     cum2 = cum;
130     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
131   }
132 
133   if (mss) { /* multilevel */
134     if (pcbddc->use_deluxe_scaling) {
135       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
136     } else {
137       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
138     }
139     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
140   }
141 
142   for (i=0;i<sub_schurs->n_subs;i++) {
143     PetscInt j,subset_size;
144 
145     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
146     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
147       const PetscInt *idxs;
148       PetscScalar    one = 1.0,zero=0.0;
149       PetscBLASInt   B_N;
150 
151       /* S should be copied since we need it for deluxe scaling */
152       if (symmetric) {
153         PetscInt j;
154         for (j=0;j<subset_size;j++) {
155           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
156         }
157         for (j=0;j<subset_size;j++) {
158           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
159         }
160       } else {
161         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
162         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
163       }
164       /* is always this the right matrix? */
165       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
166 
167       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
168       /* St = Starray+cumarray; */
169 
170       if (symmetric) {
171         PetscBLASInt B_itype = 1;
172         PetscBLASInt B_IL = 1, B_IU;
173         PetscScalar  eps = -1.0; /* dlamch? */
174         PetscInt     nmin_s;
175 
176         /* ask for eigenvalues lower than thresh */
177         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
178         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
179         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
180 #if defined(PETSC_USE_COMPLEX)
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
182 #else
183         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
184 #endif
185         ierr = PetscFPTrapPop();CHKERRQ(ierr);
186         if (B_ierr) {
187           if (B_ierr < 0 ) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
189           } else if (B_ierr <= B_N) {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
191           } else {
192             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
193           }
194         }
195 
196         if (B_neigs > nmax) {
197           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
198           B_neigs = nmax;
199         }
200 
201         nmin_s = PetscMin(nmin,B_N);
202         if (B_neigs < nmin_s) {
203           PetscBLASInt B_neigs2;
204 
205           B_IL = B_neigs + 1;
206           ierr = PetscBLASIntCast(nmin_s,&B_IU);CHKERRQ(ierr);
207           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
208           if (symmetric) {
209             PetscInt j;
210             for (j=0;j<subset_size;j++) {
211               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
212             }
213             for (j=0;j<subset_size;j++) {
214               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
215             }
216           } else {
217             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
219           }
220           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
221 #if defined(PETSC_USE_COMPLEX)
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
223 #else
224           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
225 #endif
226           ierr = PetscFPTrapPop();CHKERRQ(ierr);
227           B_neigs += B_neigs2;
228         }
229         if (B_ierr) {
230           if (B_ierr < 0 ) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
232           } else if (B_ierr <= B_N) {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
234           } else {
235             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
236           }
237         }
238         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
239         for (j=0;j<B_neigs;j++) {
240           if (eigs[j] == 0.0) {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
242           } else {
243             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
244           }
245         }
246       } else {
247           /* TODO */
248       }
249       maxneigs = PetscMax(B_neigs,maxneigs);
250       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
251 
252       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
253       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
254       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
255 #if 0
256       PetscInt ii;
257       for (ii=0;ii<B_neigs;ii++) {
258         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Eigenvector %d/%d (%d)\n",PetscGlobalRank,ii,B_neigs,B_N);
259         for (j=0;j<B_N;j++) {
260           PetscPrintf(PETSC_COMM_SELF,"[%d]       %1.4e %1.4e\n",PetscGlobalRank,eigv[ii*B_N+j],Seigv[ii*B_N+j]);
261         }
262       }
263 #endif
264       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
265       for (j=0;j<B_neigs;j++) {
266 #if 0
267         {
268           PetscBLASInt Blas_N,Blas_one = 1.0;
269           PetscScalar norm;
270           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
271           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
272           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
273             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
274           } else {
275             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
276           }
277           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
278         }
279 #endif
280         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
281         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
282         cum2 += subset_size;
283       }
284       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
285     }
286     /* shift for next computation */
287     cumarray += subset_size*subset_size;
288   }
289   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
290   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
291 
292   if (mss) {
293     if (pcbddc->use_deluxe_scaling) {
294       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
295     } else {
296       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
297     }
298     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
299   }
300   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
301 #if defined(PETSC_USE_COMPLEX)
302   ierr = PetscFree(rwork);CHKERRQ(ierr);
303 #endif
304   if (pcbddc->dbg_flag) {
305     PetscInt maxneigs_r;
306     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
307     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
308   }
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "PCBDDCSetUpSolvers"
314 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
315 {
316   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
317   PetscScalar    *coarse_submat_vals;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   /* Setup local scatters R_to_B and (optionally) R_to_D */
322   /* PCBDDCSetUpLocalWorkVectors should be called first! */
323   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
324 
325   /* Setup local neumann solver ksp_R */
326   /* PCBDDCSetUpLocalScatters should be called first! */
327   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
328 
329   /* Change global null space passed in by the user if change of basis has been requested */
330   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
331     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
332   }
333 
334   /*
335      Setup local correction and local part of coarse basis.
336      Gives back the dense local part of the coarse matrix in column major ordering
337   */
338   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
339 
340   /* Compute total number of coarse nodes and setup coarse solver */
341   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
342 
343   /* free */
344   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "PCBDDCResetCustomization"
350 PetscErrorCode PCBDDCResetCustomization(PC pc)
351 {
352   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
353   PetscErrorCode ierr;
354 
355   PetscFunctionBegin;
356   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
357   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
358   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
359   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
360   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
361   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
362   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
363   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
364   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
365   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
366   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetTopography"
372 PetscErrorCode PCBDDCResetTopography(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
379   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
380   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
381   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
382   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
383   PetscFunctionReturn(0);
384 }
385 
386 #undef __FUNCT__
387 #define __FUNCT__ "PCBDDCResetSolvers"
388 PetscErrorCode PCBDDCResetSolvers(PC pc)
389 {
390   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
391   PetscScalar    *array;
392   PetscErrorCode ierr;
393 
394   PetscFunctionBegin;
395   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
396   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
397   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
398   ierr = PetscFree(array);CHKERRQ(ierr);
399   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
404   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
405   if (pcbddc->local_auxmat2) {
406     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
407     ierr = PetscFree(array);CHKERRQ(ierr);
408   }
409   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
411   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
412   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
414   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
415   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
416   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
417   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
418   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
419   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
420   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
421   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
422   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
423   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
424   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
431 {
432   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
433   PC_IS          *pcis = (PC_IS*)pc->data;
434   VecType        impVecType;
435   PetscInt       n_constraints,n_R,old_size;
436   PetscErrorCode ierr;
437 
438   PetscFunctionBegin;
439   if (!pcbddc->ConstraintMatrix) {
440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
441   }
442   /* get sizes */
443   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
444   n_R = pcis->n-pcbddc->n_actual_vertices;
445   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
446   /* local work vectors (try to avoid unneeded work)*/
447   /* R nodes */
448   old_size = -1;
449   if (pcbddc->vec1_R) {
450     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
451   }
452   if (n_R != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
454     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
455     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
456     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
457     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
458     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
459   }
460   /* local primal dofs */
461   old_size = -1;
462   if (pcbddc->vec1_P) {
463     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
464   }
465   if (pcbddc->local_primal_size != old_size) {
466     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
467     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
468     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
469     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
470   }
471   /* local explicit constraints */
472   old_size = -1;
473   if (pcbddc->vec1_C) {
474     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
475   }
476   if (n_constraints && n_constraints != old_size) {
477     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
481   }
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "PCBDDCSetUpCorrection"
487 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
488 {
489   PetscErrorCode         ierr;
490   /* pointers to pcis and pcbddc */
491   PC_IS*                 pcis = (PC_IS*)pc->data;
492   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
493   /* submatrices of local problem */
494   Mat                    A_RV,A_VR,A_VV;
495   /* submatrices of local coarse problem */
496   Mat                    S_VV,S_CV,S_VC,S_CC;
497   /* working matrices */
498   Mat                    C_CR;
499   /* additional working stuff */
500   PC                     pc_R;
501   Mat                    F;
502   PetscBool              isLU,isCHOL,isILU;
503 
504   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
505   PetscScalar            *work;
506   PetscInt               *idx_V_B;
507   PetscInt               n,n_vertices,n_constraints;
508   PetscInt               i,n_R,n_D,n_B;
509   PetscBool              unsymmetric_check;
510   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
511   MatType                impMatType;
512   /* some shortcuts to scalars */
513   PetscScalar            one=1.0,m_one=-1.0;
514 
515   PetscFunctionBegin;
516   /* get number of vertices (corners plus constraints with change of basis)
517      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
518   n_vertices = pcbddc->n_actual_vertices;
519   n_constraints = pcbddc->local_primal_size-n_vertices;
520   /* Set Non-overlapping dimensions */
521   n_B = pcis->n_B; n_D = pcis->n - n_B;
522   n_R = pcis->n-n_vertices;
523 
524   /* Set types for local objects needed by BDDC precondtioner */
525   impMatType = MATSEQDENSE;
526 
527   /* vertices in boundary numbering */
528   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
529   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
530   if (i != n_vertices) {
531     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
532   }
533 
534   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
535   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
536   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
537   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
538   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
539   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
540   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
541   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
542   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
543   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
544 
545   unsymmetric_check = PETSC_FALSE;
546   /* allocate workspace */
547   n = 0;
548   if (n_constraints) {
549     n += n_R*n_constraints;
550   }
551   if (n_vertices) {
552     n = PetscMax(2*n_R*n_vertices,n);
553   }
554   if (!pcbddc->issym) {
555     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
556     unsymmetric_check = PETSC_TRUE;
557   }
558   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
559 
560   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
561   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
562   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
563   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
564   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
565   if (isLU || isILU || isCHOL) {
566     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
567   } else {
568     F = NULL;
569   }
570 
571   /* Precompute stuffs needed for preprocessing and application of BDDC*/
572   if (n_constraints) {
573     Mat M1,M2,M3;
574     IS  is_aux;
575     /* see if we can save some allocations */
576     if (pcbddc->local_auxmat2) {
577       PetscInt on_R,on_constraints;
578       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
579       if (on_R != n_R || on_constraints != n_constraints) {
580         PetscScalar *marray;
581 
582         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
583         ierr = PetscFree(marray);CHKERRQ(ierr);
584         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
585         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
586       }
587     }
588     /* auxiliary matrices */
589     if (!pcbddc->local_auxmat2) {
590       PetscScalar *marray;
591 
592       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
593       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
594       marray += n_R*n_constraints;
595       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
596     }
597 
598     /* Extract constraints on R nodes: C_{CR}  */
599     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
600     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
601     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
602 
603     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
604     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
605     for (i=0;i<n_constraints;i++) {
606       const PetscScalar *row_cmat_values;
607       const PetscInt    *row_cmat_indices;
608       PetscInt          size_of_constraint,j;
609 
610       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
611       for (j=0;j<size_of_constraint;j++) {
612         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
613       }
614       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
615     }
616     if (F) {
617       Mat B;
618 
619       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
620       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
621       ierr = MatDestroy(&B);CHKERRQ(ierr);
622     } else {
623       PetscScalar *xarray;
624       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
625       for (i=0;i<n_constraints;i++) {
626         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
627         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
628         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
629         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
630         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
631       }
632       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
633     }
634 
635     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
636     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
637     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
638     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
639     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
640     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
641     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
642     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
643     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
644     ierr = MatDestroy(&M2);CHKERRQ(ierr);
645     ierr = MatDestroy(&M3);CHKERRQ(ierr);
646     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
647     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
648     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650   }
651 
652   /* Get submatrices from subdomain matrix */
653   if (n_vertices) {
654     Mat       newmat;
655     IS        is_aux;
656     PetscInt  ibs,mbs;
657     PetscBool issbaij;
658 
659     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
660     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
661     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
662     if (ibs != mbs) { /* need to convert to SEQAIJ */
663       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
664       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
665       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
666       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
667       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
668     } else {
669       /* this is safe */
670       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
671       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
672       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
673         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
674         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
675         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
676         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
677       } else {
678         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
679         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
680       }
681     }
682     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
683   }
684 
685   /* Matrix of coarse basis functions (local) */
686   if (pcbddc->coarse_phi_B) {
687     PetscInt on_B,on_primal,on_D=n_D;
688     if (pcbddc->coarse_phi_D) {
689       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
690     }
691     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
692     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
693       PetscScalar *marray;
694 
695       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
696       ierr = PetscFree(marray);CHKERRQ(ierr);
697       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
698       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
699       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
700       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
701     }
702   }
703 
704   if (!pcbddc->coarse_phi_B) {
705     PetscScalar *marray;
706 
707     n = n_B*pcbddc->local_primal_size;
708     if (pcbddc->switch_static || pcbddc->dbg_flag) {
709       n += n_D*pcbddc->local_primal_size;
710     }
711     if (!pcbddc->issym) {
712       n *= 2;
713     }
714     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
715     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
716     n = n_B*pcbddc->local_primal_size;
717     if (pcbddc->switch_static || pcbddc->dbg_flag) {
718       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
719       n += n_D*pcbddc->local_primal_size;
720     }
721     if (!pcbddc->issym) {
722       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
723       if (pcbddc->switch_static || pcbddc->dbg_flag) {
724         n = n_B*pcbddc->local_primal_size;
725         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
726       }
727     } else {
728       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
729       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
730       if (pcbddc->switch_static || pcbddc->dbg_flag) {
731         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
732         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
733       }
734     }
735   }
736   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
737   /* vertices */
738   if (n_vertices) {
739     Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
740     PetscScalar  *x,*y;
741     PetscBLASInt B_N,B_one = 1;
742 
743     ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
744     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
745     ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
746     if (F) {
747       ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
748     } else {
749       ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
750       for (i=0;i<n_vertices;i++) {
751         ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
752         ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
753         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
754         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
755         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
756       }
757       ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
758     }
759     ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
760     /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
761     if (n_constraints) {
762       Mat B;
763       ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
764       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
765       ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
766       ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
767       ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
768       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
769       ierr = MatDestroy(&B);CHKERRQ(ierr);
770     }
771     ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
772     ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
773     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
774     ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
775     ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
776     ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
777     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
778     ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
779     ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
780     ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
781     ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
782     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
783     /* coarse basis functions */
784     for (i=0;i<n_vertices;i++) {
785       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
786       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
787       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
788       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
789       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
790       y[n_B*i+idx_V_B[i]] = 1.0;
791       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
792       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
793 
794       if (pcbddc->switch_static || pcbddc->dbg_flag) {
795         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
796         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
797         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
799         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
800         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
801       }
802       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
803     }
804     ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
805     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
806   }
807 
808   if (n_constraints) {
809     Mat B;
810 
811     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
812     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
813     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
814     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
815     if (n_vertices) {
816       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
817     }
818     ierr = MatDestroy(&B);CHKERRQ(ierr);
819     /* coarse basis functions */
820     for (i=0;i<n_constraints;i++) {
821       PetscScalar *y;
822 
823       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
824       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
825       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
826       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
829       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
830 
831       if (pcbddc->switch_static || pcbddc->dbg_flag) {
832         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
833         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
834         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
837         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
838       }
839       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
840     }
841   }
842 
843   /* compute other basis functions for non-symmetric problems */
844   if (!pcbddc->issym) {
845     Mat B,X;
846 
847     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
848 
849     if (n_constraints) {
850       Mat S_VCT,S_CCT,B_C;
851 
852       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
853       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
854       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
855       if (n_vertices) {
856         Mat B_V;
857 
858         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
859         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
860         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
861         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
862       }
863       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
864     }
865     if (n_vertices) {
866       Mat          A_VRT;
867       PetscBLASInt B_N,B_one = 1;
868 
869       if (!n_constraints) { /* if there are no constraints, reset work */
870         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
871       }
872       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
873       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
874       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
875       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
876     }
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
879     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
880       for (i=0;i<pcbddc->local_primal_size;i++) {
881         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
882         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
883         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
884         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
885         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
886       }
887     } else {
888       for (i=0;i<pcbddc->local_primal_size;i++) {
889         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
890         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
891         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
892         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
893         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
894       }
895     }
896     ierr = MatDestroy(&B);CHKERRQ(ierr);
897     /* coarse basis functions */
898     for (i=0;i<pcbddc->local_primal_size;i++) {
899       PetscScalar *y;
900 
901       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
902       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
903       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
904       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906       if (i<n_vertices) {
907         y[n_B*i+idx_V_B[i]] = 1.0;
908       }
909       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
910       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
911 
912       if (pcbddc->switch_static || pcbddc->dbg_flag) {
913         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
914         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
915         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
918         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
919       }
920       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
921     }
922     ierr = MatDestroy(&X);CHKERRQ(ierr);
923   }
924 
925   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
926   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
927   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
928   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
929   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
930   /* Checking coarse_sub_mat and coarse basis functios */
931   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
932   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
933   if (pcbddc->dbg_flag) {
934     Mat         coarse_sub_mat;
935     Mat         AUXMAT,TM1,TM2,TM3,TM4;
936     Mat         coarse_phi_D,coarse_phi_B;
937     Mat         coarse_psi_D,coarse_psi_B;
938     Mat         A_II,A_BB,A_IB,A_BI;
939     MatType     checkmattype=MATSEQAIJ;
940     PetscReal   real_value;
941 
942     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
943     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
944     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
945     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
946     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
947     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
948     if (unsymmetric_check) {
949       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
950       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
951     }
952     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
953 
954     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
955     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
956     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
957     if (unsymmetric_check) {
958       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
959       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
960       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
961       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
962       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
963       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
964       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
965       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
966       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
967       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
968       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
969       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
970     } else {
971       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
972       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
973       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
974       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
975       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
976       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
977       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
978       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
979     }
980     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
981     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
982     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
983     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
984     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
985     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
986 
987     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
988     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
989     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
990     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
991     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
992     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
993     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
994     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
995     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
996     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
997     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
998     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
999     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1000     if (unsymmetric_check) {
1001       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1002       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1003     }
1004     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1005   }
1006 
1007   /* free memory */
1008   ierr = PetscFree(work);CHKERRQ(ierr);
1009   if (n_vertices) {
1010     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1011   }
1012   if (n_constraints) {
1013     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1014   }
1015   /* get back data */
1016   *coarse_submat_vals_n = coarse_submat_vals;
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1022 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1023 {
1024   Mat            *work_mat;
1025   IS             isrow_s,iscol_s;
1026   PetscBool      rsorted,csorted;
1027   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1028   PetscErrorCode ierr;
1029 
1030   PetscFunctionBegin;
1031   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1032   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1033   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1034   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1035 
1036   if (!rsorted) {
1037     const PetscInt *idxs;
1038     PetscInt *idxs_sorted,i;
1039 
1040     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1041     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1042     for (i=0;i<rsize;i++) {
1043       idxs_perm_r[i] = i;
1044     }
1045     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1046     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1047     for (i=0;i<rsize;i++) {
1048       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1049     }
1050     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1051     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1052   } else {
1053     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1054     isrow_s = isrow;
1055   }
1056 
1057   if (!csorted) {
1058     if (isrow == iscol) {
1059       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1060       iscol_s = isrow_s;
1061     } else {
1062       const PetscInt *idxs;
1063       PetscInt *idxs_sorted,i;
1064 
1065       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1066       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1067       for (i=0;i<csize;i++) {
1068         idxs_perm_c[i] = i;
1069       }
1070       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1071       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1072       for (i=0;i<csize;i++) {
1073         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1074       }
1075       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1076       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1077     }
1078   } else {
1079     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1080     iscol_s = iscol;
1081   }
1082 
1083   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1084 
1085   if (!rsorted || !csorted) {
1086     Mat      new_mat;
1087     IS       is_perm_r,is_perm_c;
1088 
1089     if (!rsorted) {
1090       PetscInt *idxs_r,i;
1091       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1092       for (i=0;i<rsize;i++) {
1093         idxs_r[idxs_perm_r[i]] = i;
1094       }
1095       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1096       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1097     } else {
1098       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1099     }
1100     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1101 
1102     if (!csorted) {
1103       if (isrow_s == iscol_s) {
1104         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1105         is_perm_c = is_perm_r;
1106       } else {
1107         PetscInt *idxs_c,i;
1108         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1109         for (i=0;i<csize;i++) {
1110           idxs_c[idxs_perm_c[i]] = i;
1111         }
1112         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1113         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1114       }
1115     } else {
1116       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1117     }
1118     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1119 
1120     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1121     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1122     work_mat[0] = new_mat;
1123     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1124     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1125   }
1126 
1127   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1128   *B = work_mat[0];
1129   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1130   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1131   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1137 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1138 {
1139   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1140   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1141   Mat            new_mat;
1142   IS             is_local,is_global;
1143   PetscInt       local_size;
1144   PetscBool      isseqaij;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1149   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1150   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1151   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1152   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1153   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1154   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1155 
1156   /* check */
1157   if (pcbddc->dbg_flag) {
1158     Vec       x,x_change;
1159     PetscReal error;
1160 
1161     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1162     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1163     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1164     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1165     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1167     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1168     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1169     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1170     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1171     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1172     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1173     ierr = VecDestroy(&x);CHKERRQ(ierr);
1174     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1175   }
1176 
1177   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1178   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1179   if (isseqaij) {
1180     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1181   } else {
1182     Mat work_mat;
1183     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1184     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1185     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1186   }
1187   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1188   /*
1189   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1190   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1191   */
1192   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1198 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1199 {
1200   PC_IS*         pcis = (PC_IS*)(pc->data);
1201   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1202   IS             is_aux1,is_aux2;
1203   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1204   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1205   PetscInt       vbs,bs;
1206   PetscBT        bitmask;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   /*
1211     No need to setup local scatters if
1212       - primal space is unchanged
1213         AND
1214       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1215         AND
1216       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1217   */
1218   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1219     PetscFunctionReturn(0);
1220   }
1221   /* destroy old objects */
1222   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1223   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1224   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1225   /* Set Non-overlapping dimensions */
1226   n_B = pcis->n_B; n_D = pcis->n - n_B;
1227   n_vertices = pcbddc->n_actual_vertices;
1228   /* create auxiliary bitmask */
1229   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1230   for (i=0;i<n_vertices;i++) {
1231     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1232   }
1233 
1234   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1235   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1236   for (i=0, n_R=0; i<pcis->n; i++) {
1237     if (!PetscBTLookup(bitmask,i)) {
1238       idx_R_local[n_R] = i;
1239       n_R++;
1240     }
1241   }
1242 
1243   /* Block code */
1244   vbs = 1;
1245   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1246   if (bs>1 && !(n_vertices%bs)) {
1247     PetscBool is_blocked = PETSC_TRUE;
1248     PetscInt  *vary;
1249     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1250     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1251     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1252     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1253     for (i=0; i<n_vertices; i++) {
1254       if (vary[i]!=0 && vary[i]!=bs) {
1255         is_blocked = PETSC_FALSE;
1256         break;
1257       }
1258     }
1259     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1260       vbs = bs;
1261       for (i=0;i<n_R/vbs;i++) {
1262         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1263       }
1264     }
1265     ierr = PetscFree(vary);CHKERRQ(ierr);
1266   }
1267   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1268   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1269 
1270   /* print some info if requested */
1271   if (pcbddc->dbg_flag) {
1272     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1273     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1274     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1275     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1276     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1277     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1278     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1279     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1280   }
1281 
1282   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1283   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1284   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1285   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1286   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1287   for (i=0; i<n_D; i++) {
1288     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1289   }
1290   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1291   for (i=0, j=0; i<n_R; i++) {
1292     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1293       aux_array1[j++] = i;
1294     }
1295   }
1296   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1297   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1298   for (i=0, j=0; i<n_B; i++) {
1299     if (!PetscBTLookup(bitmask,is_indices[i])) {
1300       aux_array2[j++] = i;
1301     }
1302   }
1303   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1304   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1305   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1306   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1307   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1308 
1309   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1310     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1311     for (i=0, j=0; i<n_R; i++) {
1312       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1313         aux_array1[j++] = i;
1314       }
1315     }
1316     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1317     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1318     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1319   }
1320   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1321   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1328 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1329 {
1330   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1331   PC_IS          *pcis = (PC_IS*)pc->data;
1332   PC             pc_temp;
1333   Mat            A_RR;
1334   MatReuse       reuse;
1335   PetscScalar    m_one = -1.0;
1336   PetscReal      value;
1337   PetscInt       n_D,n_R,ibs,mbs;
1338   PetscBool      use_exact,use_exact_reduced,issbaij;
1339   PetscErrorCode ierr;
1340   /* prefixes stuff */
1341   char           dir_prefix[256],neu_prefix[256],str_level[16];
1342   size_t         len;
1343 
1344   PetscFunctionBegin;
1345 
1346   /* compute prefixes */
1347   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1348   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1349   if (!pcbddc->current_level) {
1350     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1351     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1352     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1353     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1354   } else {
1355     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1356     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1357     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1358     len -= 15; /* remove "pc_bddc_coarse_" */
1359     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1360     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1361     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1362     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1363     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1364     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1365     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1366     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1367   }
1368 
1369   /* DIRICHLET PROBLEM */
1370   if (dirichlet) {
1371     /* Matrix for Dirichlet problem is pcis->A_II */
1372     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1373     if (!pcbddc->ksp_D) { /* create object if not yet build */
1374       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1375       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1376       /* default */
1377       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1378       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1379       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1380       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1381       if (issbaij) {
1382         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1383       } else {
1384         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1385       }
1386       /* Allow user's customization */
1387       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1388       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1389     }
1390     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1391     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1392     if (!n_D) {
1393       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1394       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1395     }
1396     /* Set Up KSP for Dirichlet problem of BDDC */
1397     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1398     /* set ksp_D into pcis data */
1399     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1400     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1401     pcis->ksp_D = pcbddc->ksp_D;
1402   }
1403 
1404   /* NEUMANN PROBLEM */
1405   A_RR = 0;
1406   if (neumann) {
1407     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1408     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1409     if (pcbddc->ksp_R) { /* already created ksp */
1410       PetscInt nn_R;
1411       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1412       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1413       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1414       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1415         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1416         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1417         reuse = MAT_INITIAL_MATRIX;
1418       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1419         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1420           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1421           reuse = MAT_INITIAL_MATRIX;
1422         } else { /* safe to reuse the matrix */
1423           reuse = MAT_REUSE_MATRIX;
1424         }
1425       }
1426       /* last check */
1427       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1428         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1429         reuse = MAT_INITIAL_MATRIX;
1430       }
1431     } else { /* first time, so we need to create the matrix */
1432       reuse = MAT_INITIAL_MATRIX;
1433     }
1434     /* extract A_RR */
1435     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1436     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1437     if (ibs != mbs) {
1438       Mat newmat;
1439       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1440       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1441       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1442     } else {
1443       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1444     }
1445     if (!pcbddc->ksp_R) { /* create object if not present */
1446       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1447       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1448       /* default */
1449       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1450       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1451       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1452       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1453       if (issbaij) {
1454         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1455       } else {
1456         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1457       }
1458       /* Allow user's customization */
1459       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1460       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1461     }
1462     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1463     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1464     if (!n_R) {
1465       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1466       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1467     }
1468     /* Set Up KSP for Neumann problem of BDDC */
1469     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1470   }
1471 
1472   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1473   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1474     if (pcbddc->dbg_flag) {
1475       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1476       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1478     }
1479     if (dirichlet) { /* Dirichlet */
1480       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1481       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1482       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1483       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1484       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1485       /* need to be adapted? */
1486       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1487       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1488       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1489       /* print info */
1490       if (pcbddc->dbg_flag) {
1491         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1492         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1493       }
1494       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1495         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1496       }
1497     }
1498     if (neumann) { /* Neumann */
1499       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1500       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1501       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1502       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1503       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1504       /* need to be adapted? */
1505       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1506       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1507       /* print info */
1508       if (pcbddc->dbg_flag) {
1509         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1510         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1511       }
1512       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1513         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1514       }
1515     }
1516   }
1517   /* free Neumann problem's matrix */
1518   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1524 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1525 {
1526   PetscErrorCode ierr;
1527   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1528 
1529   PetscFunctionBegin;
1530   if (applytranspose) {
1531     if (pcbddc->local_auxmat1) {
1532       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1533       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1534     }
1535     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1536   } else {
1537     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1538     if (pcbddc->local_auxmat1) {
1539       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1540       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1541     }
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1547 #undef __FUNCT__
1548 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1549 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1550 {
1551   PetscErrorCode ierr;
1552   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1553   PC_IS*            pcis = (PC_IS*)  (pc->data);
1554   const PetscScalar zero = 0.0;
1555 
1556   PetscFunctionBegin;
1557   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1558   if (applytranspose) {
1559     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1560     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1561   } else {
1562     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1563     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1564   }
1565   /* start communications from local primal nodes to rhs of coarse solver */
1566   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1567   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1571   /* TODO remove null space when doing multilevel */
1572   if (pcbddc->coarse_ksp) {
1573     if (applytranspose) {
1574       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1575     } else {
1576       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1577     }
1578   }
1579 
1580   /* Local solution on R nodes */
1581   if (pcis->n) {
1582     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1583     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1584     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1585     if (pcbddc->switch_static) {
1586       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1587       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1588     }
1589     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1590     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1591     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1592     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1593     if (pcbddc->switch_static) {
1594       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1595       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1596     }
1597   }
1598 
1599   /* communications from coarse sol to local primal nodes */
1600   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1601   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1602 
1603   /* Sum contributions from two levels */
1604   if (applytranspose) {
1605     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1606     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1607   } else {
1608     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1609     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1615 #undef __FUNCT__
1616 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1617 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1618 {
1619   PetscErrorCode ierr;
1620   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1621   PetscScalar    *array,*array2;
1622   Vec            from,to;
1623 
1624   PetscFunctionBegin;
1625   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1626     from = pcbddc->coarse_vec;
1627     to = pcbddc->vec1_P;
1628     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1629       Vec tvec;
1630       PetscInt lsize;
1631       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1632       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1633       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1634       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1635       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1636       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1637       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1638     }
1639   } else { /* from local to global -> put data in coarse right hand side */
1640     from = pcbddc->vec1_P;
1641     to = pcbddc->coarse_vec;
1642   }
1643   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1649 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1650 {
1651   PetscErrorCode ierr;
1652   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1653   PetscScalar    *array,*array2;
1654   Vec            from,to;
1655 
1656   PetscFunctionBegin;
1657   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1658     from = pcbddc->coarse_vec;
1659     to = pcbddc->vec1_P;
1660   } else { /* from local to global -> put data in coarse right hand side */
1661     from = pcbddc->vec1_P;
1662     to = pcbddc->coarse_vec;
1663   }
1664   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1665   if (smode == SCATTER_FORWARD) {
1666     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1667       Vec tvec;
1668       PetscInt lsize;
1669       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1670       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1671       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1672       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1673       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1674       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1675       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1676     }
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 /* uncomment for testing purposes */
1682 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1683 #undef __FUNCT__
1684 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1685 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1686 {
1687   PetscErrorCode    ierr;
1688   PC_IS*            pcis = (PC_IS*)(pc->data);
1689   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1690   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1691   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1692   MatType           impMatType=MATSEQAIJ;
1693   /* one and zero */
1694   PetscScalar       one=1.0,zero=0.0;
1695   /* space to store constraints and their local indices */
1696   PetscScalar       *temp_quadrature_constraint;
1697   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1698   /* iterators */
1699   PetscInt          i,j,k,total_counts,temp_start_ptr;
1700   /* stuff to store connected components stored in pcbddc->mat_graph */
1701   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1702   PetscInt          n_ISForFaces,n_ISForEdges;
1703   /* near null space stuff */
1704   MatNullSpace      nearnullsp;
1705   const Vec         *nearnullvecs;
1706   Vec               *localnearnullsp;
1707   PetscBool         nnsp_has_cnst;
1708   PetscInt          nnsp_size;
1709   PetscScalar       *array;
1710   /* BLAS integers */
1711   PetscBLASInt      lwork,lierr;
1712   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1713   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1714   /* LAPACK working arrays for SVD or POD */
1715   PetscBool         skip_lapack;
1716   PetscScalar       *work;
1717   PetscReal         *singular_vals;
1718 #if defined(PETSC_USE_COMPLEX)
1719   PetscReal         *rwork;
1720 #endif
1721 #if defined(PETSC_MISSING_LAPACK_GESVD)
1722   PetscBLASInt      Blas_one_2=1;
1723   PetscScalar       *temp_basis,*correlation_mat;
1724 #else
1725   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1726   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1727 #endif
1728   /* reuse */
1729   PetscInt          olocal_primal_size;
1730   PetscInt          *oprimal_indices_local_idxs;
1731   /* change of basis */
1732   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1733   PetscBool         boolforchange,qr_needed;
1734   PetscBT           touched,change_basis,qr_needed_idx;
1735   /* auxiliary stuff */
1736   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1737   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1738   PetscScalar       *temp_quadrature_constraint_work=NULL;
1739   /* some quantities */
1740   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1741   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1742 
1743 
1744   PetscFunctionBegin;
1745   /* Destroy Mat objects computed previously */
1746   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1747   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1748   /* TODO synch with adaptive selection */
1749   /* Get index sets for faces, edges and vertices from graph */
1750   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1751   /* free unneeded index sets */
1752   if (!pcbddc->use_vertices) {
1753     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1754   }
1755   if (!pcbddc->use_edges) {
1756     for (i=0;i<n_ISForEdges;i++) {
1757       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1758     }
1759     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1760     n_ISForEdges = 0;
1761   }
1762   if (!pcbddc->use_faces) {
1763     for (i=0;i<n_ISForFaces;i++) {
1764       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1765     }
1766     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1767     n_ISForFaces = 0;
1768   }
1769   /* HACKS (the following two blocks of code) */
1770   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1771     pcbddc->use_change_of_basis = PETSC_TRUE;
1772     if (!ISForEdges) {
1773       pcbddc->use_change_on_faces = PETSC_TRUE;
1774     }
1775   }
1776   if (pcbddc->NullSpace) {
1777     /* use_change_of_basis should be consistent among processors */
1778     PetscBool tbool[2],gbool[2];
1779     tbool [0] = pcbddc->use_change_of_basis;
1780     tbool [1] = pcbddc->use_change_on_faces;
1781     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1782     pcbddc->use_change_of_basis = gbool[0];
1783     pcbddc->use_change_on_faces = gbool[1];
1784   }
1785   /* print some info */
1786   if (pcbddc->dbg_flag) {
1787     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1788     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1789     i = 0;
1790     if (ISForVertices) {
1791       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1792     }
1793     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1794     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1795     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1796     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1797   }
1798 
1799   if (!pcbddc->adaptive_selection) {
1800     /* check if near null space is attached to global mat */
1801     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1802     if (nearnullsp) {
1803       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1804       /* remove any stored info */
1805       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1806       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1807       /* store information for BDDC solver reuse */
1808       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1809       pcbddc->onearnullspace = nearnullsp;
1810       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1811       for (i=0;i<nnsp_size;i++) {
1812         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1813       }
1814     } else { /* if near null space is not provided BDDC uses constants by default */
1815       nnsp_size = 0;
1816       nnsp_has_cnst = PETSC_TRUE;
1817     }
1818     /* get max number of constraints on a single cc */
1819     max_constraints = nnsp_size;
1820     if (nnsp_has_cnst) max_constraints++;
1821 
1822     /*
1823          Evaluate maximum storage size needed by the procedure
1824          - temp_indices will contain start index of each constraint stored as follows
1825          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1826          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1827          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1828                                                                                                                                                            */
1829     total_counts = n_ISForFaces+n_ISForEdges;
1830     total_counts *= max_constraints;
1831     n_vertices = 0;
1832     if (ISForVertices) {
1833       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1834     }
1835     total_counts += n_vertices;
1836     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1837     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1838     total_counts = 0;
1839     max_size_of_constraint = 0;
1840     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1841       if (i<n_ISForEdges) {
1842         used_IS = &ISForEdges[i];
1843       } else {
1844         used_IS = &ISForFaces[i-n_ISForEdges];
1845       }
1846       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1847       total_counts += j;
1848       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1849     }
1850     total_counts *= max_constraints;
1851     total_counts += n_vertices;
1852     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1853     /* get local part of global near null space vectors */
1854     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1855     for (k=0;k<nnsp_size;k++) {
1856       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1857       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1858       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1859     }
1860 
1861     /* whether or not to skip lapack calls */
1862     skip_lapack = PETSC_TRUE;
1863     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1864 
1865     /* allocate some auxiliary stuff */
1866     if (!skip_lapack || pcbddc->use_qr_single) {
1867       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1868     }
1869 
1870     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1871     if (!skip_lapack) {
1872       PetscScalar temp_work;
1873 
1874 #if defined(PETSC_MISSING_LAPACK_GESVD)
1875       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1876       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1877       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1878       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1879 #if defined(PETSC_USE_COMPLEX)
1880       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1881 #endif
1882       /* now we evaluate the optimal workspace using query with lwork=-1 */
1883       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1884       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1885       lwork = -1;
1886       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1887 #if !defined(PETSC_USE_COMPLEX)
1888       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1889 #else
1890       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1891 #endif
1892       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1893       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1894 #else /* on missing GESVD */
1895       /* SVD */
1896       PetscInt max_n,min_n;
1897       max_n = max_size_of_constraint;
1898       min_n = max_constraints;
1899       if (max_size_of_constraint < max_constraints) {
1900         min_n = max_size_of_constraint;
1901         max_n = max_constraints;
1902       }
1903       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1904 #if defined(PETSC_USE_COMPLEX)
1905       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1906 #endif
1907       /* now we evaluate the optimal workspace using query with lwork=-1 */
1908       lwork = -1;
1909       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1910       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1911       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1912       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1913 #if !defined(PETSC_USE_COMPLEX)
1914       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1915 #else
1916       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1917 #endif
1918       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1919       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1920 #endif /* on missing GESVD */
1921       /* Allocate optimal workspace */
1922       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1923       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1924     }
1925     /* Now we can loop on constraining sets */
1926     total_counts = 0;
1927     temp_indices[0] = 0;
1928     /* vertices */
1929     if (ISForVertices) {
1930       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1931       if (nnsp_has_cnst) { /* consider all vertices */
1932         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1933         for (i=0;i<n_vertices;i++) {
1934           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1935           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1936           total_counts++;
1937         }
1938       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1939         PetscBool used_vertex;
1940         for (i=0;i<n_vertices;i++) {
1941           used_vertex = PETSC_FALSE;
1942           k = 0;
1943           while (!used_vertex && k<nnsp_size) {
1944             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1945             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1946               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1947               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1948               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1949               total_counts++;
1950               used_vertex = PETSC_TRUE;
1951             }
1952             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1953             k++;
1954           }
1955         }
1956       }
1957       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1958       n_vertices = total_counts;
1959     }
1960 
1961     /* edges and faces */
1962     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1963       if (ncc<n_ISForEdges) {
1964         used_IS = &ISForEdges[ncc];
1965         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1966       } else {
1967         used_IS = &ISForFaces[ncc-n_ISForEdges];
1968         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1969       }
1970       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1971       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1972       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1973       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1974       /* change of basis should not be performed on local periodic nodes */
1975       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1976       if (nnsp_has_cnst) {
1977         PetscScalar quad_value;
1978         temp_constraints++;
1979         if (!pcbddc->use_nnsp_true) {
1980           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1981         } else {
1982           quad_value = 1.0;
1983         }
1984         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1985         for (j=0;j<size_of_constraint;j++) {
1986           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1987         }
1988         /* sort by global ordering if using lapack subroutines (not needed!) */
1989         if (!skip_lapack || pcbddc->use_qr_single) {
1990           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1991           for (j=0;j<size_of_constraint;j++) {
1992             permutation[j]=j;
1993           }
1994           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1995           for (j=0;j<size_of_constraint;j++) {
1996             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
1997           }
1998           for (j=0;j<size_of_constraint;j++) {
1999             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2000             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2001           }
2002           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2003           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2004         }
2005         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2006         total_counts++;
2007       }
2008       for (k=0;k<nnsp_size;k++) {
2009         PetscReal real_value;
2010         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2011         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2012         for (j=0;j<size_of_constraint;j++) {
2013           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2014         }
2015         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2016         /* check if array is null on the connected component */
2017         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2018         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2019         if (real_value > 0.0) { /* keep indices and values */
2020           /* sort by global ordering if using lapack subroutines */
2021           if (!skip_lapack || pcbddc->use_qr_single) {
2022             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2023             for (j=0;j<size_of_constraint;j++) {
2024               permutation[j]=j;
2025             }
2026             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2027             for (j=0;j<size_of_constraint;j++) {
2028               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2029               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2030             }
2031             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2032             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2033           }
2034           temp_constraints++;
2035           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2036           total_counts++;
2037         }
2038       }
2039       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2040       valid_constraints = temp_constraints;
2041       if (!pcbddc->use_nnsp_true && temp_constraints) {
2042         if (temp_constraints == 1) { /* just normalize the constraint */
2043           PetscScalar norm;
2044           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2045           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2046           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2047           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2048         } else { /* perform SVD */
2049           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2050 
2051 #if defined(PETSC_MISSING_LAPACK_GESVD)
2052           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2053              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2054              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2055                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2056                 from that computed using LAPACKgesvd
2057              -> This is due to a different computation of eigenvectors in LAPACKheev
2058              -> The quality of the POD-computed basis will be the same */
2059           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2060           /* Store upper triangular part of correlation matrix */
2061           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2062           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2063           for (j=0;j<temp_constraints;j++) {
2064             for (k=0;k<j+1;k++) {
2065               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2066             }
2067           }
2068           /* compute eigenvalues and eigenvectors of correlation matrix */
2069           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2070           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2071 #if !defined(PETSC_USE_COMPLEX)
2072           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2073 #else
2074           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2075 #endif
2076           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2077           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2078           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2079           j = 0;
2080           while (j < temp_constraints && singular_vals[j] < tol) j++;
2081           total_counts = total_counts-j;
2082           valid_constraints = temp_constraints-j;
2083           /* scale and copy POD basis into used quadrature memory */
2084           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2085           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2086           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2087           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2088           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2089           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2090           if (j<temp_constraints) {
2091             PetscInt ii;
2092             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2093             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2094             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2095             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2096             for (k=0;k<temp_constraints-j;k++) {
2097               for (ii=0;ii<size_of_constraint;ii++) {
2098                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2099               }
2100             }
2101           }
2102 #else  /* on missing GESVD */
2103           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2104           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2105           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2106           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2107 #if !defined(PETSC_USE_COMPLEX)
2108           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2109 #else
2110           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2111 #endif
2112           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2113           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2114           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2115           k = temp_constraints;
2116           if (k > size_of_constraint) k = size_of_constraint;
2117           j = 0;
2118           while (j < k && singular_vals[k-j-1] < tol) j++;
2119           valid_constraints = k-j;
2120           total_counts = total_counts-temp_constraints+valid_constraints;
2121 #endif /* on missing GESVD */
2122         }
2123       }
2124       /* setting change_of_basis flag is safe now */
2125       if (boolforchange) {
2126         for (j=0;j<valid_constraints;j++) {
2127           PetscBTSet(change_basis,total_counts-j-1);
2128         }
2129       }
2130     }
2131     /* free workspace */
2132     if (!skip_lapack || pcbddc->use_qr_single) {
2133       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2134     }
2135     if (!skip_lapack) {
2136       ierr = PetscFree(work);CHKERRQ(ierr);
2137 #if defined(PETSC_USE_COMPLEX)
2138       ierr = PetscFree(rwork);CHKERRQ(ierr);
2139 #endif
2140       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2141 #if defined(PETSC_MISSING_LAPACK_GESVD)
2142       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2143       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2144 #endif
2145     }
2146     for (k=0;k<nnsp_size;k++) {
2147       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2148     }
2149     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2150   } else {
2151     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2152     PetscInt        cum = 0;
2153 
2154     total_counts = 0;
2155     n_vertices = 0;
2156     if (sub_schurs->is_Ej_com) {
2157       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2158     }
2159     max_constraints = 0;
2160     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2161       total_counts += pcbddc->adaptive_constraints_n[i];
2162       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2163     }
2164     temp_indices = pcbddc->adaptive_constraints_ptrs;
2165     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2166     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2167 
2168 #if 0
2169     printf("Found %d totals\n",total_counts);
2170     for (i=0;i<total_counts;i++) {
2171       printf("const %d, start %d",i,temp_indices[i]);
2172       printf(" end %d:\n",temp_indices[i+1]);
2173       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2174         printf("  idxs %d",temp_indices_to_constraint[j]);
2175         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2176       }
2177     }
2178     for (i=0;i<n_vertices;i++) {
2179       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2180     }
2181     for (i=0;i<sub_schurs->n_subs;i++) {
2182       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2183     }
2184 #endif
2185 
2186     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2187     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2188     /* Change of basis */
2189     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2190     if (pcbddc->use_change_of_basis) {
2191       cum = n_vertices;
2192       for (i=0;i<sub_schurs->n_subs;i++) {
2193         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2194           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2195             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2196           }
2197         }
2198         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2199       }
2200     }
2201   }
2202 
2203   /* free index sets of faces, edges and vertices */
2204   for (i=0;i<n_ISForFaces;i++) {
2205     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2206   }
2207   if (n_ISForFaces) {
2208     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2209   }
2210   for (i=0;i<n_ISForEdges;i++) {
2211     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2212   }
2213   if (n_ISForEdges) {
2214     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2215   }
2216   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2217 
2218   /* map temp_indices_to_constraint in boundary numbering */
2219   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2220   if (i != temp_indices[total_counts]) {
2221     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2222   }
2223 
2224   /* set quantities in pcbddc data structure and store previous primal size */
2225   /* n_vertices defines the number of subdomain corners in the primal space */
2226   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2227   olocal_primal_size = pcbddc->local_primal_size;
2228   pcbddc->local_primal_size = total_counts;
2229   pcbddc->n_vertices = n_vertices;
2230   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2231 
2232   /* Create constraint matrix */
2233   /* The constraint matrix is used to compute the l2g map of primal dofs */
2234   /* so we need to set it up properly either with or without change of basis */
2235   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2236   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
2237   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2238   /* array to compute a local numbering of constraints : vertices first then constraints */
2239   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2240   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2241   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2242   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2243   /* auxiliary stuff for basis change */
2244   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2245   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2246 
2247   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2248   total_primal_vertices=0;
2249   for (i=0;i<pcbddc->local_primal_size;i++) {
2250     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2251     if (size_of_constraint == 1) {
2252       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2253       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2254       aux_primal_minloc[total_primal_vertices]=0;
2255       total_primal_vertices++;
2256     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2257       PetscInt min_loc,min_index;
2258       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2259       /* find first untouched local node */
2260       k = 0;
2261       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2262       min_index = global_indices[k];
2263       min_loc = k;
2264       /* search the minimum among global nodes already untouched on the cc */
2265       for (k=1;k<size_of_constraint;k++) {
2266         /* there can be more than one constraint on a single connected component */
2267         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2268           min_index = global_indices[k];
2269           min_loc = k;
2270         }
2271       }
2272       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2273       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2274       aux_primal_minloc[total_primal_vertices]=min_loc;
2275       total_primal_vertices++;
2276     }
2277   }
2278   /* determine if a QR strategy is needed for change of basis */
2279   qr_needed = PETSC_FALSE;
2280   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2281   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2282     if (PetscBTLookup(change_basis,i)) {
2283       if (!pcbddc->use_qr_single) {
2284         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2285         j = 0;
2286         for (k=0;k<size_of_constraint;k++) {
2287           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2288             j++;
2289           }
2290         }
2291         /* found more than one primal dof on the cc */
2292         if (j > 1) {
2293           PetscBTSet(qr_needed_idx,i);
2294           qr_needed = PETSC_TRUE;
2295         }
2296       } else {
2297         PetscBTSet(qr_needed_idx,i);
2298         qr_needed = PETSC_TRUE;
2299       }
2300     }
2301   }
2302   /* free workspace */
2303   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2304 
2305   /* permute indices in order to have a sorted set of vertices */
2306   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2307 
2308   /* nonzero structure of constraint matrix */
2309   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2310   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2311   j=total_primal_vertices;
2312   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2313     if (!PetscBTLookup(change_basis,i)) {
2314       nnz[j]=temp_indices[i+1]-temp_indices[i];
2315       j++;
2316     }
2317   }
2318   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2319   ierr = PetscFree(nnz);CHKERRQ(ierr);
2320   /* set values in constraint matrix */
2321   for (i=0;i<total_primal_vertices;i++) {
2322     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2323   }
2324   total_counts = total_primal_vertices;
2325   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2326     if (!PetscBTLookup(change_basis,i)) {
2327       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2328       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2329       total_counts++;
2330     }
2331   }
2332   /* assembling */
2333   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335   /*
2336   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2337   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2338   */
2339   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2340   if (pcbddc->use_change_of_basis) {
2341     /* dual and primal dofs on a single cc */
2342     PetscInt     dual_dofs,primal_dofs;
2343     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2344     PetscInt     primal_counter;
2345     /* working stuff for GEQRF */
2346     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2347     PetscBLASInt lqr_work;
2348     /* working stuff for UNGQR */
2349     PetscScalar  *gqr_work,lgqr_work_t;
2350     PetscBLASInt lgqr_work;
2351     /* working stuff for TRTRS */
2352     PetscScalar  *trs_rhs;
2353     PetscBLASInt Blas_NRHS;
2354     /* pointers for values insertion into change of basis matrix */
2355     PetscInt     *start_rows,*start_cols;
2356     PetscScalar  *start_vals;
2357     /* working stuff for values insertion */
2358     PetscBT      is_primal;
2359     /* matrix sizes */
2360     PetscInt     global_size,local_size;
2361     /* temporary change of basis */
2362     Mat          localChangeOfBasisMatrix;
2363 
2364     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2365     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2366     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2367     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2368 
2369     /* nonzeros for local mat */
2370     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2371     for (i=0;i<pcis->n;i++) nnz[i]=1;
2372     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2373       if (PetscBTLookup(change_basis,i)) {
2374         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2375         if (PetscBTLookup(qr_needed_idx,i)) {
2376           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2377         } else {
2378           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2379           /* get local primal index on the cc */
2380           j = 0;
2381           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2382           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2383         }
2384       }
2385     }
2386     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2387     ierr = PetscFree(nnz);CHKERRQ(ierr);
2388     /* Set initial identity in the matrix */
2389     for (i=0;i<pcis->n;i++) {
2390       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2391     }
2392 
2393     if (pcbddc->dbg_flag) {
2394       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2395       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2396     }
2397 
2398 
2399     /* Now we loop on the constraints which need a change of basis */
2400     /*
2401        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2402        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2403 
2404        Basic blocks of change of basis matrix T computed by
2405 
2406           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2407 
2408             | 1        0   ...        0         s_1/S |
2409             | 0        1   ...        0         s_2/S |
2410             |              ...                        |
2411             | 0        ...            1     s_{n-1}/S |
2412             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2413 
2414             with S = \sum_{i=1}^n s_i^2
2415             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2416                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2417 
2418           - QR decomposition of constraints otherwise
2419     */
2420     if (qr_needed) {
2421       /* space to store Q */
2422       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2423       /* first we issue queries for optimal work */
2424       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2425       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2426       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2427       lqr_work = -1;
2428       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2429       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2430       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2431       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2432       lgqr_work = -1;
2433       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2434       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2435       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2436       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2437       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2438       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2439       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2440       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2441       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2442       /* array to store scaling factors for reflectors */
2443       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2444       /* array to store rhs and solution of triangular solver */
2445       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2446       /* allocating workspace for check */
2447       if (pcbddc->dbg_flag) {
2448         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2449       }
2450     }
2451     /* array to store whether a node is primal or not */
2452     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2453     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2454     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2455     if (i != total_primal_vertices) {
2456       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2457     }
2458     for (i=0;i<total_primal_vertices;i++) {
2459       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2460     }
2461     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2462 
2463     /* loop on constraints and see whether or not they need a change of basis and compute it */
2464     /* -> using implicit ordering contained in temp_indices data */
2465     total_counts = pcbddc->n_vertices;
2466     primal_counter = total_counts;
2467     while (total_counts<pcbddc->local_primal_size) {
2468       primal_dofs = 1;
2469       if (PetscBTLookup(change_basis,total_counts)) {
2470         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2471         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2472           primal_dofs++;
2473         }
2474         /* get constraint info */
2475         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2476         dual_dofs = size_of_constraint-primal_dofs;
2477 
2478         if (pcbddc->dbg_flag) {
2479           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2480         }
2481 
2482         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2483 
2484           /* copy quadrature constraints for change of basis check */
2485           if (pcbddc->dbg_flag) {
2486             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2487           }
2488           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2489           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2490 
2491           /* compute QR decomposition of constraints */
2492           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2493           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2494           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2495           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2496           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2497           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2498           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2499 
2500           /* explictly compute R^-T */
2501           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2502           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2503           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2504           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2505           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2506           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2507           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2508           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2509           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2510           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2511 
2512           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2513           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2514           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2515           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2516           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2517           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2518           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2519           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2520           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2521 
2522           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2523              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2524              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2525           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2526           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2527           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2528           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2529           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2530           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2531           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2532           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2533           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2534           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2535 
2536           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2537           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2538           /* insert cols for primal dofs */
2539           for (j=0;j<primal_dofs;j++) {
2540             start_vals = &qr_basis[j*size_of_constraint];
2541             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2542             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2543           }
2544           /* insert cols for dual dofs */
2545           for (j=0,k=0;j<dual_dofs;k++) {
2546             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2547               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2548               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2549               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2550               j++;
2551             }
2552           }
2553 
2554           /* check change of basis */
2555           if (pcbddc->dbg_flag) {
2556             PetscInt   ii,jj;
2557             PetscBool valid_qr=PETSC_TRUE;
2558             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2559             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2560             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2561             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2562             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2563             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2564             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2565             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2566             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2567             for (jj=0;jj<size_of_constraint;jj++) {
2568               for (ii=0;ii<primal_dofs;ii++) {
2569                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2570                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2571               }
2572             }
2573             if (!valid_qr) {
2574               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2575               for (jj=0;jj<size_of_constraint;jj++) {
2576                 for (ii=0;ii<primal_dofs;ii++) {
2577                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2578                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2579                   }
2580                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2581                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2582                   }
2583                 }
2584               }
2585             } else {
2586               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2587             }
2588           }
2589         } else { /* simple transformation block */
2590           PetscInt    row,col;
2591           PetscScalar val,norm;
2592 
2593           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2594           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2595           for (j=0;j<size_of_constraint;j++) {
2596             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2597             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2598             if (!PetscBTLookup(is_primal,row_B)) {
2599               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2600               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2601               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2602             } else {
2603               for (k=0;k<size_of_constraint;k++) {
2604                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2605                 if (row != col) {
2606                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2607                 } else {
2608                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2609                 }
2610                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2611               }
2612             }
2613           }
2614           if (pcbddc->dbg_flag) {
2615             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2616           }
2617         }
2618         /* increment primal counter */
2619         primal_counter += primal_dofs;
2620       } else {
2621         if (pcbddc->dbg_flag) {
2622           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2623         }
2624       }
2625       /* increment constraint counter total_counts */
2626       total_counts += primal_dofs;
2627     }
2628 
2629     /* free workspace */
2630     if (qr_needed) {
2631       if (pcbddc->dbg_flag) {
2632         ierr = PetscFree(work);CHKERRQ(ierr);
2633       }
2634       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2635       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2636       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2637       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2638       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2639     }
2640     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2641     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2642     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2643 
2644     /* assembling of global change of variable */
2645     {
2646       Mat tmat;
2647       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2648       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2649       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2650       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2651       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2652       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2653       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2654       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2655       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2656       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2657       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2658       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2659       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2660       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2661       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2662       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2663     }
2664     /* check */
2665     if (pcbddc->dbg_flag) {
2666       PetscReal error;
2667       Vec       x,x_change;
2668 
2669       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2670       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2671       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2672       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2673       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2674       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2675       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2676       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2677       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2678       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2679       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2680       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2681       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2682       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2683       ierr = VecDestroy(&x);CHKERRQ(ierr);
2684       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2685     }
2686 
2687     /* adapt sub_schurs computed (if any) */
2688     if (pcbddc->use_deluxe_scaling) {
2689       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2690       if (sub_schurs->n_subs_par_g) {
2691         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2692       }
2693       if (sub_schurs->S_Ej_all) {
2694         Mat S_1,S_2,tmat;
2695         IS is_all_N;
2696 
2697         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2698         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2699         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2700         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2701         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2702         sub_schurs->S_Ej_all = S_1;
2703         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2704         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2705         sub_schurs->sum_S_Ej_all = S_2;
2706         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2707       }
2708     }
2709     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2710   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2711     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2712     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2713   }
2714 
2715   /* set up change of basis context */
2716   if (pcbddc->ChangeOfBasisMatrix) {
2717     PCBDDCChange_ctx change_ctx;
2718 
2719     if (!pcbddc->new_global_mat) {
2720       PetscInt global_size,local_size;
2721 
2722       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2723       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2724       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2725       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2726       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2727       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2728       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2729       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2730       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2731     } else {
2732       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2733       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2734       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2735     }
2736     if (!pcbddc->user_ChangeOfBasisMatrix) {
2737       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2738       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2739     } else {
2740       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2741       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2742     }
2743     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2744     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2745     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2746     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2747   }
2748 
2749   /* get indices in local ordering for vertices and constraints */
2750   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2751     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2752     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2753   }
2754   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2755   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2756   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2757   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2758   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2759   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2760   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2761   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2762   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2763   /* set quantities in PCBDDC data struct */
2764   pcbddc->n_actual_vertices = i;
2765   /* check if a new primal space has been introduced */
2766   pcbddc->new_primal_space_local = PETSC_TRUE;
2767   if (olocal_primal_size == pcbddc->local_primal_size) {
2768     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2769     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2770     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2771   }
2772   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2773   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2774 
2775   /* flush dbg viewer */
2776   if (pcbddc->dbg_flag) {
2777     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2778   }
2779 
2780   /* free workspace */
2781   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2782   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2783   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2784   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2785   if (!pcbddc->adaptive_selection) {
2786     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2787     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2788   } else {
2789     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2790                       pcbddc->adaptive_constraints_ptrs,
2791                       pcbddc->adaptive_constraints_idxs,
2792                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2793     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2794   }
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 #undef __FUNCT__
2799 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2800 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2801 {
2802   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2803   PC_IS       *pcis = (PC_IS*)pc->data;
2804   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2805   PetscInt    ierr,i,vertex_size;
2806   PetscViewer viewer=pcbddc->dbg_viewer;
2807 
2808   PetscFunctionBegin;
2809   /* Reset previously computed graph */
2810   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2811   /* Init local Graph struct */
2812   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2813 
2814   /* Check validity of the csr graph passed in by the user */
2815   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2816     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2817   }
2818 
2819   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2820   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2821     Mat       mat_adj;
2822     PetscInt  *xadj,*adjncy;
2823     PetscInt  nvtxs;
2824     PetscBool flg_row=PETSC_TRUE;
2825 
2826     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2827     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2828     if (!flg_row) {
2829       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2830     }
2831     if (pcbddc->use_local_adj) {
2832       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2833       pcbddc->computed_rowadj = PETSC_TRUE;
2834     } else { /* just compute subdomain's connected components */
2835       IS                     is_dummy;
2836       ISLocalToGlobalMapping l2gmap_dummy;
2837       PetscInt               j,sum;
2838       PetscInt               *cxadj,*cadjncy;
2839       const PetscInt         *idxs;
2840       PCBDDCGraph            graph;
2841       PetscBT                is_on_boundary;
2842 
2843       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2844       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2845       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2846       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2847       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2848       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2849       graph->xadj = xadj;
2850       graph->adjncy = adjncy;
2851       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2852       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2853 
2854       if (pcbddc->dbg_flag) {
2855         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2856         for (i=0;i<graph->ncc;i++) {
2857           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2858         }
2859         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2860       }
2861 
2862       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2863       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2864       for (i=0;i<pcis->n_B;i++) {
2865         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2866       }
2867       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2868 
2869       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2870       sum = 0;
2871       for (i=0;i<graph->ncc;i++) {
2872         PetscInt sizecc = 0;
2873         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2874           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2875             sizecc++;
2876           }
2877         }
2878         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2879           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2880             cxadj[graph->queue[j]] = sizecc;
2881           }
2882         }
2883         sum += sizecc*sizecc;
2884       }
2885       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2886       sum = 0;
2887       for (i=0;i<nvtxs;i++) {
2888         PetscInt temp = cxadj[i];
2889         cxadj[i] = sum;
2890         sum += temp;
2891       }
2892       cxadj[nvtxs] = sum;
2893       for (i=0;i<graph->ncc;i++) {
2894         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2895           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2896             PetscInt k,sizecc = 0;
2897             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2898               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2899                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2900                 sizecc++;
2901               }
2902             }
2903           }
2904         }
2905       }
2906       if (nvtxs) {
2907         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2908       } else {
2909         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2910         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2911       }
2912       graph->xadj = 0;
2913       graph->adjncy = 0;
2914       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2915       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2916     }
2917     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2918     if (!flg_row) {
2919       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2920     }
2921     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2922   }
2923 
2924   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2925   vertex_size = 1;
2926   if (pcbddc->user_provided_isfordofs) {
2927     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2928       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2929       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2930         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2931         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2932       }
2933       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2934       pcbddc->n_ISForDofs = 0;
2935       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2936     }
2937     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2938     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2939   } else {
2940     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2941       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2942       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2943       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2944         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2945       }
2946     }
2947   }
2948 
2949   /* Setup of Graph */
2950   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2951     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2952   }
2953   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2954     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2955   }
2956   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2957 
2958   /* Graph's connected components analysis */
2959   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2960 
2961   /* print some info to stdout */
2962   if (pcbddc->dbg_flag) {
2963     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2964   }
2965 
2966   /* mark topography has done */
2967   pcbddc->recompute_topography = PETSC_FALSE;
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 #undef __FUNCT__
2972 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2973 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2974 {
2975   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2976   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2977   PetscErrorCode ierr;
2978 
2979   PetscFunctionBegin;
2980   n = 0;
2981   vertices = 0;
2982   if (pcbddc->ConstraintMatrix) {
2983     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2984     for (i=0;i<local_primal_size;i++) {
2985       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2986       if (size_of_constraint == 1) n++;
2987       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2988     }
2989     if (vertices_idx) {
2990       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2991       n = 0;
2992       for (i=0;i<local_primal_size;i++) {
2993         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2994         if (size_of_constraint == 1) {
2995           vertices[n++]=row_cmat_indices[0];
2996         }
2997         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2998       }
2999     }
3000   }
3001   *n_vertices = n;
3002   if (vertices_idx) *vertices_idx = vertices;
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 #undef __FUNCT__
3007 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3008 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3009 {
3010   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3011   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3012   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3013   PetscBT        touched;
3014   PetscErrorCode ierr;
3015 
3016     /* This function assumes that the number of local constraints per connected component
3017        is not greater than the number of nodes defined for the connected component
3018        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3019   PetscFunctionBegin;
3020   n = 0;
3021   constraints_index = 0;
3022   if (pcbddc->ConstraintMatrix) {
3023     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3024     max_size_of_constraint = 0;
3025     for (i=0;i<local_primal_size;i++) {
3026       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3027       if (size_of_constraint > 1) {
3028         n++;
3029       }
3030       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3031       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3032     }
3033     if (constraints_idx) {
3034       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3035       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3036       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3037       n = 0;
3038       for (i=0;i<local_primal_size;i++) {
3039         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3040         if (size_of_constraint > 1) {
3041           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3042           /* find first untouched local node */
3043           j = 0;
3044           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3045           min_index = row_cmat_global_indices[j];
3046           min_loc = j;
3047           /* search the minimum among nodes not yet touched on the connected component
3048              since there can be more than one constraint on a single cc */
3049           for (j=1;j<size_of_constraint;j++) {
3050             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3051               min_index = row_cmat_global_indices[j];
3052               min_loc = j;
3053             }
3054           }
3055           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3056           constraints_index[n++] = row_cmat_indices[min_loc];
3057         }
3058         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3059       }
3060       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3061       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3062     }
3063   }
3064   *n_constraints = n;
3065   if (constraints_idx) *constraints_idx = constraints_index;
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "PCBDDCSubsetNumbering"
3071 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3072 {
3073   Vec            local_vec,global_vec;
3074   IS             seqis,paris;
3075   VecScatter     scatter_ctx;
3076   PetscScalar    *array;
3077   PetscInt       *temp_global_dofs;
3078   PetscScalar    globalsum;
3079   PetscInt       i,j,s;
3080   PetscInt       nlocals,first_index,old_index,max_local;
3081   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3082   PetscMPIInt    *dof_sizes,*dof_displs;
3083   PetscBool      first_found;
3084   PetscErrorCode ierr;
3085 
3086   PetscFunctionBegin;
3087   /* mpi buffers */
3088   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3089   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3090   j = ( !rank_prec_comm ? size_prec_comm : 0);
3091   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3092   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3093   /* get maximum size of subset */
3094   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3095   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3096   max_local = 0;
3097   for (i=0;i<n_local_dofs;i++) {
3098     if (max_local < temp_global_dofs[i] ) {
3099       max_local = temp_global_dofs[i];
3100     }
3101   }
3102   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3103   max_global++;
3104   max_local = 0;
3105   for (i=0;i<n_local_dofs;i++) {
3106     if (max_local < local_dofs[i] ) {
3107       max_local = local_dofs[i];
3108     }
3109   }
3110   max_local++;
3111   /* allocate workspace */
3112   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3113   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3114   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3115   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3116   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3117   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3118   /* create scatter */
3119   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3120   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3121   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3122   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3123   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3124   /* init array */
3125   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3126   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3127   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3128   if (local_dofs_mult) {
3129     for (i=0;i<n_local_dofs;i++) {
3130       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3131     }
3132   } else {
3133     for (i=0;i<n_local_dofs;i++) {
3134       array[local_dofs[i]]=1.0;
3135     }
3136   }
3137   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3138   /* scatter into global vec and get total number of global dofs */
3139   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3140   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3141   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3142   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3143   /* Fill global_vec with cumulative function for global numbering */
3144   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3145   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3146   nlocals = 0;
3147   first_index = -1;
3148   first_found = PETSC_FALSE;
3149   for (i=0;i<s;i++) {
3150     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3151       first_found = PETSC_TRUE;
3152       first_index = i;
3153     }
3154     nlocals += (PetscInt)PetscRealPart(array[i]);
3155   }
3156   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3157   if (!rank_prec_comm) {
3158     dof_displs[0]=0;
3159     for (i=1;i<size_prec_comm;i++) {
3160       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3161     }
3162   }
3163   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3164   if (first_found) {
3165     array[first_index] += (PetscScalar)nlocals;
3166     old_index = first_index;
3167     for (i=first_index+1;i<s;i++) {
3168       if (PetscRealPart(array[i]) > 0.1) {
3169         array[i] += array[old_index];
3170         old_index = i;
3171       }
3172     }
3173   }
3174   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3175   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3176   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3177   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3178   /* get global ordering of local dofs */
3179   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3180   if (local_dofs_mult) {
3181     for (i=0;i<n_local_dofs;i++) {
3182       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3183     }
3184   } else {
3185     for (i=0;i<n_local_dofs;i++) {
3186       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3187     }
3188   }
3189   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3190   /* free workspace */
3191   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3192   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3193   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3194   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3195   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3196   /* return pointer to global ordering of local dofs */
3197   *global_numbering_subset = temp_global_dofs;
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 #undef __FUNCT__
3202 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3203 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3204 {
3205   PetscInt       i,j;
3206   PetscScalar    *alphas;
3207   PetscErrorCode ierr;
3208 
3209   PetscFunctionBegin;
3210   /* this implements stabilized Gram-Schmidt */
3211   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3212   for (i=0;i<n;i++) {
3213     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3214     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3215     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3216   }
3217   ierr = PetscFree(alphas);CHKERRQ(ierr);
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 #undef __FUNCT__
3222 #define __FUNCT__ "MatISGetSubassemblingPattern"
3223 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3224 {
3225   Mat             subdomain_adj;
3226   IS              new_ranks,ranks_send_to;
3227   MatPartitioning partitioner;
3228   Mat_IS          *matis;
3229   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3230   PetscInt        prank;
3231   PetscMPIInt     size,rank,color;
3232   PetscInt        *xadj,*adjncy,*oldranks;
3233   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3234   PetscInt        i,local_size,threshold=0;
3235   PetscErrorCode  ierr;
3236   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3237   PetscSubcomm    subcomm;
3238 
3239   PetscFunctionBegin;
3240   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3241   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3242   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3243 
3244   /* Get info on mapping */
3245   matis = (Mat_IS*)(mat->data);
3246   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3247   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3248 
3249   /* build local CSR graph of subdomains' connectivity */
3250   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3251   xadj[0] = 0;
3252   xadj[1] = PetscMax(n_neighs-1,0);
3253   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3254   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3255 
3256   if (threshold) {
3257     PetscInt xadj_count = 0;
3258     for (i=1;i<n_neighs;i++) {
3259       if (n_shared[i] > threshold) {
3260         adjncy[xadj_count] = neighs[i];
3261         adjncy_wgt[xadj_count] = n_shared[i];
3262         xadj_count++;
3263       }
3264     }
3265     xadj[1] = xadj_count;
3266   } else {
3267     if (xadj[1]) {
3268       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3269       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3270     }
3271   }
3272   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3273   if (use_square) {
3274     for (i=0;i<xadj[1];i++) {
3275       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3276     }
3277   }
3278   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3279 
3280   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3281 
3282   /*
3283     Restrict work on active processes only.
3284   */
3285   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3286   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3287   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3288   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3289   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3290   if (color) {
3291     ierr = PetscFree(xadj);CHKERRQ(ierr);
3292     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3293     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3294   } else {
3295     PetscInt coarsening_ratio;
3296     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3297     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3298     prank = rank;
3299     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3300     /*
3301     for (i=0;i<size;i++) {
3302       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3303     }
3304     */
3305     for (i=0;i<xadj[1];i++) {
3306       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3307     }
3308     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3309     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3310     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3311 
3312     /* Partition */
3313     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3314     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3315     if (use_vwgt) {
3316       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3317       v_wgt[0] = local_size;
3318       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3319     }
3320     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3321     coarsening_ratio = size/n_subdomains;
3322     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3323     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3324     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3325     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3326 
3327     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3328     if (contiguous) {
3329       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3330     } else {
3331       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3332     }
3333     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3334     /* clean up */
3335     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3336     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3337     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3338     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3339   }
3340   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3341 
3342   /* assemble parallel IS for sends */
3343   i = 1;
3344   if (color) i=0;
3345   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3346 
3347   /* get back IS */
3348   *is_sends = ranks_send_to;
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatISSubassemble"
3356 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3357 {
3358   Mat                    local_mat;
3359   Mat_IS                 *matis;
3360   IS                     is_sends_internal;
3361   PetscInt               rows,cols,new_local_rows;
3362   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3363   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3364   ISLocalToGlobalMapping l2gmap;
3365   PetscInt*              l2gmap_indices;
3366   const PetscInt*        is_indices;
3367   MatType                new_local_type;
3368   /* buffers */
3369   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3370   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3371   PetscInt               *recv_buffer_idxs_local;
3372   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3373   /* MPI */
3374   MPI_Comm               comm,comm_n;
3375   PetscSubcomm           subcomm;
3376   PetscMPIInt            n_sends,n_recvs,commsize;
3377   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3378   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3379   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3380   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3381   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3382   PetscErrorCode         ierr;
3383 
3384   PetscFunctionBegin;
3385   /* TODO: add missing checks */
3386   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3387   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3388   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3389   PetscValidLogicalCollectiveInt(mat,nis,7);
3390   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3391   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3392   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3393   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3394   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3395   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3396   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3397   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3398     PetscInt mrows,mcols,mnrows,mncols;
3399     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3400     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3401     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3402     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3403     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3404     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3405   }
3406   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3407   PetscValidLogicalCollectiveInt(mat,bs,0);
3408   /* prepare IS for sending if not provided */
3409   if (!is_sends) {
3410     PetscBool pcontig = PETSC_TRUE;
3411     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3412     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3413   } else {
3414     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3415     is_sends_internal = is_sends;
3416   }
3417 
3418   /* get pointer of MATIS data */
3419   matis = (Mat_IS*)mat->data;
3420 
3421   /* get comm */
3422   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3423 
3424   /* compute number of sends */
3425   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3426   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3427 
3428   /* compute number of receives */
3429   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3430   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3431   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3432   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3433   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3434   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3435   ierr = PetscFree(iflags);CHKERRQ(ierr);
3436 
3437   /* restrict comm if requested */
3438   subcomm = 0;
3439   destroy_mat = PETSC_FALSE;
3440   if (restrict_comm) {
3441     PetscMPIInt color,subcommsize;
3442 
3443     color = 0;
3444     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3445     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3446     subcommsize = commsize - subcommsize;
3447     /* check if reuse has been requested */
3448     if (reuse == MAT_REUSE_MATRIX) {
3449       if (*mat_n) {
3450         PetscMPIInt subcommsize2;
3451         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3452         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3453         comm_n = PetscObjectComm((PetscObject)*mat_n);
3454       } else {
3455         comm_n = PETSC_COMM_SELF;
3456       }
3457     } else { /* MAT_INITIAL_MATRIX */
3458       PetscMPIInt rank;
3459 
3460       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3461       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3462       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3463       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3464       comm_n = subcomm->comm;
3465     }
3466     /* flag to destroy *mat_n if not significative */
3467     if (color) destroy_mat = PETSC_TRUE;
3468   } else {
3469     comm_n = comm;
3470   }
3471 
3472   /* prepare send/receive buffers */
3473   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3474   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3475   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3476   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3477   if (nis) {
3478     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3479   }
3480 
3481   /* Get data from local matrices */
3482   if (!isdense) {
3483     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3484     /* TODO: See below some guidelines on how to prepare the local buffers */
3485     /*
3486        send_buffer_vals should contain the raw values of the local matrix
3487        send_buffer_idxs should contain:
3488        - MatType_PRIVATE type
3489        - PetscInt        size_of_l2gmap
3490        - PetscInt        global_row_indices[size_of_l2gmap]
3491        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3492     */
3493   } else {
3494     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3495     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3496     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3497     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3498     send_buffer_idxs[1] = i;
3499     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3500     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3501     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3502     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3503     for (i=0;i<n_sends;i++) {
3504       ilengths_vals[is_indices[i]] = len*len;
3505       ilengths_idxs[is_indices[i]] = len+2;
3506     }
3507   }
3508   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3509   /* additional is (if any) */
3510   if (nis) {
3511     PetscMPIInt psum;
3512     PetscInt j;
3513     for (j=0,psum=0;j<nis;j++) {
3514       PetscInt plen;
3515       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3516       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3517       psum += len+1; /* indices + lenght */
3518     }
3519     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3520     for (j=0,psum=0;j<nis;j++) {
3521       PetscInt plen;
3522       const PetscInt *is_array_idxs;
3523       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3524       send_buffer_idxs_is[psum] = plen;
3525       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3526       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3527       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3528       psum += plen+1; /* indices + lenght */
3529     }
3530     for (i=0;i<n_sends;i++) {
3531       ilengths_idxs_is[is_indices[i]] = psum;
3532     }
3533     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3534   }
3535 
3536   buf_size_idxs = 0;
3537   buf_size_vals = 0;
3538   buf_size_idxs_is = 0;
3539   for (i=0;i<n_recvs;i++) {
3540     buf_size_idxs += (PetscInt)olengths_idxs[i];
3541     buf_size_vals += (PetscInt)olengths_vals[i];
3542     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3543   }
3544   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3545   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3546   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3547 
3548   /* get new tags for clean communications */
3549   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3550   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3551   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3552 
3553   /* allocate for requests */
3554   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3555   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3556   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3557   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3558   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3559   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3560 
3561   /* communications */
3562   ptr_idxs = recv_buffer_idxs;
3563   ptr_vals = recv_buffer_vals;
3564   ptr_idxs_is = recv_buffer_idxs_is;
3565   for (i=0;i<n_recvs;i++) {
3566     source_dest = onodes[i];
3567     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3568     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3569     ptr_idxs += olengths_idxs[i];
3570     ptr_vals += olengths_vals[i];
3571     if (nis) {
3572       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3573       ptr_idxs_is += olengths_idxs_is[i];
3574     }
3575   }
3576   for (i=0;i<n_sends;i++) {
3577     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3578     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3579     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3580     if (nis) {
3581       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3582     }
3583   }
3584   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3585   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3586 
3587   /* assemble new l2g map */
3588   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3589   ptr_idxs = recv_buffer_idxs;
3590   new_local_rows = 0;
3591   for (i=0;i<n_recvs;i++) {
3592     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3593     ptr_idxs += olengths_idxs[i];
3594   }
3595   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3596   ptr_idxs = recv_buffer_idxs;
3597   new_local_rows = 0;
3598   for (i=0;i<n_recvs;i++) {
3599     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3600     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3601     ptr_idxs += olengths_idxs[i];
3602   }
3603   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3604   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3605   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3606 
3607   /* infer new local matrix type from received local matrices type */
3608   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3609   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3610   if (n_recvs) {
3611     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3612     ptr_idxs = recv_buffer_idxs;
3613     for (i=0;i<n_recvs;i++) {
3614       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3615         new_local_type_private = MATAIJ_PRIVATE;
3616         break;
3617       }
3618       ptr_idxs += olengths_idxs[i];
3619     }
3620     switch (new_local_type_private) {
3621       case MATDENSE_PRIVATE:
3622         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3623           new_local_type = MATSEQAIJ;
3624           bs = 1;
3625         } else { /* if I receive only 1 dense matrix */
3626           new_local_type = MATSEQDENSE;
3627           bs = 1;
3628         }
3629         break;
3630       case MATAIJ_PRIVATE:
3631         new_local_type = MATSEQAIJ;
3632         bs = 1;
3633         break;
3634       case MATBAIJ_PRIVATE:
3635         new_local_type = MATSEQBAIJ;
3636         break;
3637       case MATSBAIJ_PRIVATE:
3638         new_local_type = MATSEQSBAIJ;
3639         break;
3640       default:
3641         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3642         break;
3643     }
3644   } else { /* by default, new_local_type is seqdense */
3645     new_local_type = MATSEQDENSE;
3646     bs = 1;
3647   }
3648 
3649   /* create MATIS object if needed */
3650   if (reuse == MAT_INITIAL_MATRIX) {
3651     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3652     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3653   } else {
3654     /* it also destroys the local matrices */
3655     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3656   }
3657   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3658   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3659 
3660   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3661 
3662   /* Global to local map of received indices */
3663   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3664   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3665   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3666 
3667   /* restore attributes -> type of incoming data and its size */
3668   buf_size_idxs = 0;
3669   for (i=0;i<n_recvs;i++) {
3670     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3671     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3672     buf_size_idxs += (PetscInt)olengths_idxs[i];
3673   }
3674   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3675 
3676   /* set preallocation */
3677   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3678   if (!newisdense) {
3679     PetscInt *new_local_nnz=0;
3680 
3681     ptr_vals = recv_buffer_vals;
3682     ptr_idxs = recv_buffer_idxs_local;
3683     if (n_recvs) {
3684       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3685     }
3686     for (i=0;i<n_recvs;i++) {
3687       PetscInt j;
3688       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3689         for (j=0;j<*(ptr_idxs+1);j++) {
3690           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3691         }
3692       } else {
3693         /* TODO */
3694       }
3695       ptr_idxs += olengths_idxs[i];
3696     }
3697     if (new_local_nnz) {
3698       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3699       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3700       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3701       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3702       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3703       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3704     } else {
3705       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3706     }
3707     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3708   } else {
3709     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3710   }
3711 
3712   /* set values */
3713   ptr_vals = recv_buffer_vals;
3714   ptr_idxs = recv_buffer_idxs_local;
3715   for (i=0;i<n_recvs;i++) {
3716     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3717       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3718       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3719       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3720       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3721       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3722     } else {
3723       /* TODO */
3724     }
3725     ptr_idxs += olengths_idxs[i];
3726     ptr_vals += olengths_vals[i];
3727   }
3728   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3729   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3730   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3731   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3732   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3733   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3734 
3735 #if 0
3736   if (!restrict_comm) { /* check */
3737     Vec       lvec,rvec;
3738     PetscReal infty_error;
3739 
3740     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3741     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3742     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3743     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3744     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3745     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3746     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3747     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3748     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3749   }
3750 #endif
3751 
3752   /* assemble new additional is (if any) */
3753   if (nis) {
3754     PetscInt **temp_idxs,*count_is,j,psum;
3755 
3756     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3757     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3758     ptr_idxs = recv_buffer_idxs_is;
3759     psum = 0;
3760     for (i=0;i<n_recvs;i++) {
3761       for (j=0;j<nis;j++) {
3762         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3763         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3764         psum += plen;
3765         ptr_idxs += plen+1; /* shift pointer to received data */
3766       }
3767     }
3768     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3769     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3770     for (i=1;i<nis;i++) {
3771       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3772     }
3773     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3774     ptr_idxs = recv_buffer_idxs_is;
3775     for (i=0;i<n_recvs;i++) {
3776       for (j=0;j<nis;j++) {
3777         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3778         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3779         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3780         ptr_idxs += plen+1; /* shift pointer to received data */
3781       }
3782     }
3783     for (i=0;i<nis;i++) {
3784       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3785       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3786       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3787     }
3788     ierr = PetscFree(count_is);CHKERRQ(ierr);
3789     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3790     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3791   }
3792   /* free workspace */
3793   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3794   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3795   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3796   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3797   if (isdense) {
3798     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3799     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3800   } else {
3801     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3802   }
3803   if (nis) {
3804     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3805     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3806   }
3807   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3808   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3809   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3810   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3811   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3812   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3813   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3814   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3815   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3816   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3817   ierr = PetscFree(onodes);CHKERRQ(ierr);
3818   if (nis) {
3819     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3820     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3821     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3822   }
3823   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3824   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3825     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3826     for (i=0;i<nis;i++) {
3827       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3828     }
3829   }
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 /* temporary hack into ksp private data structure */
3834 #include <petsc-private/kspimpl.h>
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3838 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3839 {
3840   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3841   PC_IS                  *pcis = (PC_IS*)pc->data;
3842   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3843   MatNullSpace           CoarseNullSpace=NULL;
3844   ISLocalToGlobalMapping coarse_islg;
3845   IS                     coarse_is,*isarray;
3846   PetscInt               i,im_active=-1,active_procs=-1;
3847   PetscInt               nis,nisdofs,nisneu;
3848   PC                     pc_temp;
3849   PCType                 coarse_pc_type;
3850   KSPType                coarse_ksp_type;
3851   PetscBool              multilevel_requested,multilevel_allowed;
3852   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3853   Mat                    t_coarse_mat_is;
3854   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3855   PetscMPIInt            all_procs;
3856   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3857   PetscBool              compute_vecs = PETSC_FALSE;
3858   PetscScalar            *array;
3859   PetscErrorCode         ierr;
3860 
3861   PetscFunctionBegin;
3862   /* Assign global numbering to coarse dofs */
3863   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3864     compute_vecs = PETSC_TRUE;
3865     PetscInt ocoarse_size;
3866     ocoarse_size = pcbddc->coarse_size;
3867     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3868     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3869     /* see if we can avoid some work */
3870     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3871       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3872         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3873         coarse_reuse = PETSC_FALSE;
3874       } else { /* we can safely reuse already computed coarse matrix */
3875         coarse_reuse = PETSC_TRUE;
3876       }
3877     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3878       coarse_reuse = PETSC_FALSE;
3879     }
3880     /* reset any subassembling information */
3881     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3882     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3883   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3884     coarse_reuse = PETSC_TRUE;
3885   }
3886 
3887   /* count "active" (i.e. with positive local size) and "void" processes */
3888   im_active = !!(pcis->n);
3889   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3890   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3891   void_procs = all_procs-active_procs;
3892   csin_type_simple = PETSC_TRUE;
3893   redist = PETSC_FALSE;
3894   if (pcbddc->current_level && void_procs) {
3895     csin_ml = PETSC_TRUE;
3896     ncoarse_ml = void_procs;
3897     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3898     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3899       csin_ds = PETSC_TRUE;
3900       ncoarse_ds = pcbddc->redistribute_coarse;
3901       redist = PETSC_TRUE;
3902     } else {
3903       csin_ds = PETSC_TRUE;
3904       ncoarse_ds = active_procs;
3905       redist = PETSC_TRUE;
3906     }
3907   } else {
3908     csin_ml = PETSC_FALSE;
3909     ncoarse_ml = all_procs;
3910     if (void_procs) {
3911       csin_ds = PETSC_TRUE;
3912       ncoarse_ds = void_procs;
3913       csin_type_simple = PETSC_FALSE;
3914     } else {
3915       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3916         csin_ds = PETSC_TRUE;
3917         ncoarse_ds = pcbddc->redistribute_coarse;
3918         redist = PETSC_TRUE;
3919       } else {
3920         csin_ds = PETSC_FALSE;
3921         ncoarse_ds = all_procs;
3922       }
3923     }
3924   }
3925 
3926   /*
3927     test if we can go multilevel: three conditions must be satisfied:
3928     - we have not exceeded the number of levels requested
3929     - we can actually subassemble the active processes
3930     - we can find a suitable number of MPI processes where we can place the subassembled problem
3931   */
3932   multilevel_allowed = PETSC_FALSE;
3933   multilevel_requested = PETSC_FALSE;
3934   if (pcbddc->current_level < pcbddc->max_levels) {
3935     multilevel_requested = PETSC_TRUE;
3936     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3937       multilevel_allowed = PETSC_FALSE;
3938     } else {
3939       multilevel_allowed = PETSC_TRUE;
3940     }
3941   }
3942   /* determine number of process partecipating to coarse solver */
3943   if (multilevel_allowed) {
3944     ncoarse = ncoarse_ml;
3945     csin = csin_ml;
3946   } else {
3947     ncoarse = ncoarse_ds;
3948     csin = csin_ds;
3949   }
3950 
3951   /* creates temporary l2gmap and IS for coarse indexes */
3952   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3953   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3954 
3955   /* creates temporary MATIS object for coarse matrix */
3956   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3957   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3958   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3959   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3960 #if 0
3961   {
3962     PetscViewer viewer;
3963     char filename[256];
3964     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3965     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3966     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3967     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3968     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3969   }
3970 #endif
3971   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3972   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3973   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3974   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3975   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3976 
3977   /* compute dofs splitting and neumann boundaries for coarse dofs */
3978   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3979     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3980     const PetscInt         *idxs;
3981     ISLocalToGlobalMapping tmap;
3982 
3983     /* create map between primal indices (in local representative ordering) and local primal numbering */
3984     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3985     /* allocate space for temporary storage */
3986     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3987     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3988     /* allocate for IS array */
3989     nisdofs = pcbddc->n_ISForDofsLocal;
3990     nisneu = !!pcbddc->NeumannBoundariesLocal;
3991     nis = nisdofs + nisneu;
3992     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3993     /* dofs splitting */
3994     for (i=0;i<nisdofs;i++) {
3995       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3996       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3997       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3998       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3999       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4000       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4001       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4002       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4003     }
4004     /* neumann boundaries */
4005     if (pcbddc->NeumannBoundariesLocal) {
4006       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4007       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4008       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4009       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4010       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4011       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4012       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4013       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4014     }
4015     /* free memory */
4016     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4017     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4018     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4019   } else {
4020     nis = 0;
4021     nisdofs = 0;
4022     nisneu = 0;
4023     isarray = NULL;
4024   }
4025   /* destroy no longer needed map */
4026   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4027 
4028   /* restrict on coarse candidates (if needed) */
4029   coarse_mat_is = NULL;
4030   if (csin) {
4031     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4032       if (redist) {
4033         PetscMPIInt rank;
4034         PetscInt    spc,n_spc_p1,dest[1],destsize;
4035 
4036         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4037         spc = active_procs/pcbddc->redistribute_coarse;
4038         n_spc_p1 = active_procs%pcbddc->redistribute_coarse;
4039         if (im_active) {
4040           destsize = 1;
4041           if (rank > n_spc_p1*(spc+1)-1) {
4042             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4043           } else {
4044             dest[0] = rank/(spc+1);
4045           }
4046         } else {
4047           destsize = 0;
4048         }
4049         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4050       } else if (csin_type_simple) {
4051         PetscMPIInt rank;
4052         PetscInt    issize,isidx;
4053 
4054         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4055         if (im_active) {
4056           issize = 1;
4057           isidx = (PetscInt)rank;
4058         } else {
4059           issize = 0;
4060           isidx = -1;
4061         }
4062         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4063       } else { /* get a suitable subassembling pattern from MATIS code */
4064         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4065       }
4066 
4067       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4068       if (!redist || ncoarse <= void_procs) {
4069         PetscInt ncoarse_cand,tissize,*nisindices;
4070         PetscInt *coarse_candidates;
4071         const PetscInt* tisindices;
4072 
4073         /* get coarse candidates' ranks in pc communicator */
4074         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4075         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4076         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4077           if (!coarse_candidates[i]) {
4078             coarse_candidates[ncoarse_cand++]=i;
4079           }
4080         }
4081         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4082 
4083 
4084         if (pcbddc->dbg_flag) {
4085           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4086           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4087           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4088           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4089           for (i=0;i<ncoarse_cand;i++) {
4090             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4091           }
4092           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4093           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4094         }
4095         /* shift the pattern on coarse candidates */
4096         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4097         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4098         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4099         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4100         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4101         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4102         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4103       }
4104       if (pcbddc->dbg_flag) {
4105         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4106         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4107         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4108         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4109       }
4110     }
4111     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4112     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4113   } else {
4114     if (pcbddc->dbg_flag) {
4115       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4116       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4117       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4118     }
4119     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4120     coarse_mat_is = t_coarse_mat_is;
4121   }
4122 
4123   /* create local to global scatters for coarse problem */
4124   if (compute_vecs) {
4125     PetscInt lrows;
4126     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4127     if (coarse_mat_is) {
4128       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4129     } else {
4130       lrows = 0;
4131     }
4132     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4133     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4134     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4135     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4136     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4137   }
4138   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4139   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4140 
4141   /* set defaults for coarse KSP and PC */
4142   if (multilevel_allowed) {
4143     coarse_ksp_type = KSPRICHARDSON;
4144     coarse_pc_type = PCBDDC;
4145   } else {
4146     coarse_ksp_type = KSPPREONLY;
4147     coarse_pc_type = PCREDUNDANT;
4148   }
4149 
4150   /* print some info if requested */
4151   if (pcbddc->dbg_flag) {
4152     if (!multilevel_allowed) {
4153       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4154       if (multilevel_requested) {
4155         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4156       } else if (pcbddc->max_levels) {
4157         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4158       }
4159       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4160     }
4161   }
4162 
4163   /* create the coarse KSP object only once with defaults */
4164   if (coarse_mat_is) {
4165     MatReuse coarse_mat_reuse;
4166     PetscViewer dbg_viewer = NULL;
4167     if (pcbddc->dbg_flag) {
4168       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4169       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4170     }
4171     if (!pcbddc->coarse_ksp) {
4172       char prefix[256],str_level[16];
4173       size_t len;
4174       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4175       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4176       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4177       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4178       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4179       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4180       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4181       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4182       /* prefix */
4183       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4184       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4185       if (!pcbddc->current_level) {
4186         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4187         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4188       } else {
4189         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4190         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4191         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4192         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4193         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4194         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4195       }
4196       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4197       /* allow user customization */
4198       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4199       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4200     }
4201 
4202     /* get some info after set from options */
4203     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4204     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4205     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4206     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4207     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4208       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4209       isbddc = PETSC_FALSE;
4210     }
4211     if (isredundant) {
4212       KSP inner_ksp;
4213       PC inner_pc;
4214       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4215       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4216       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4217     }
4218 
4219     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4220     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4221     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4222     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4223     if (nisdofs) {
4224       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4225       for (i=0;i<nisdofs;i++) {
4226         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4227       }
4228     }
4229     if (nisneu) {
4230       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4231       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4232     }
4233 
4234     /* assemble coarse matrix */
4235     if (coarse_reuse) {
4236       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4237       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4238       coarse_mat_reuse = MAT_REUSE_MATRIX;
4239     } else {
4240       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4241     }
4242     if (isbddc || isnn) {
4243       if (pcbddc->coarsening_ratio > 1) {
4244         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4245           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4246           if (pcbddc->dbg_flag) {
4247             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4248             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4249             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4250             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4251           }
4252         }
4253         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4254       } else {
4255         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4256         coarse_mat = coarse_mat_is;
4257       }
4258     } else {
4259       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4260     }
4261     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4262 
4263     /* propagate symmetry info to coarse matrix */
4264     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4265     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4266 
4267     /* set operators */
4268     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4269     if (pcbddc->dbg_flag) {
4270       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4271     }
4272   } else { /* processes non partecipating to coarse solver (if any) */
4273     coarse_mat = 0;
4274   }
4275   ierr = PetscFree(isarray);CHKERRQ(ierr);
4276 #if 0
4277   {
4278     PetscViewer viewer;
4279     char filename[256];
4280     sprintf(filename,"coarse_mat.m");
4281     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4282     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4283     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4284     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4285   }
4286 #endif
4287 
4288   /* Compute coarse null space (special handling by BDDC only) */
4289   if (pcbddc->NullSpace) {
4290     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4291   }
4292 
4293   if (pcbddc->coarse_ksp) {
4294     Vec crhs,csol;
4295     PetscBool ispreonly;
4296     if (CoarseNullSpace) {
4297       if (isbddc) {
4298         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4299       } else {
4300         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4301       }
4302     }
4303     /* setup coarse ksp */
4304     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4305     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4306     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4307     /* hack */
4308     if (!csol) {
4309       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4310     }
4311     if (!crhs) {
4312       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4313     }
4314     /* Check coarse problem if in debug mode or if solving with an iterative method */
4315     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4316     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4317       KSP       check_ksp;
4318       KSPType   check_ksp_type;
4319       PC        check_pc;
4320       Vec       check_vec,coarse_vec;
4321       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4322       PetscInt  its;
4323       PetscBool compute_eigs;
4324       PetscReal *eigs_r,*eigs_c;
4325       PetscInt  neigs;
4326       const char *prefix;
4327 
4328       /* Create ksp object suitable for estimation of extreme eigenvalues */
4329       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4330       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4331       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4332       if (ispreonly) {
4333         check_ksp_type = KSPPREONLY;
4334         compute_eigs = PETSC_FALSE;
4335       } else {
4336         check_ksp_type = KSPGMRES;
4337         compute_eigs = PETSC_TRUE;
4338       }
4339       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4340       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4341       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4342       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4343       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4344       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4345       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4346       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4347       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4348       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4349       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4350       /* create random vec */
4351       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4352       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4353       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4354       if (CoarseNullSpace) {
4355         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4356       }
4357       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4358       /* solve coarse problem */
4359       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4360       if (CoarseNullSpace) {
4361         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4362       }
4363       /* set eigenvalue estimation if preonly has not been requested */
4364       if (compute_eigs) {
4365         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4366         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4367         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4368         lambda_max = eigs_r[neigs-1];
4369         lambda_min = eigs_r[0];
4370         if (pcbddc->use_coarse_estimates) {
4371           if (lambda_max>lambda_min) {
4372             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4373             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4374           }
4375         }
4376       }
4377 
4378       /* check coarse problem residual error */
4379       if (pcbddc->dbg_flag) {
4380         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4381         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4382         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4383         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4384         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4385         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4386         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4387         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4388         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4389         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4390         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4391         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4392         if (compute_eigs) {
4393           PetscReal lambda_max_s,lambda_min_s;
4394           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4395           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4396           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4397           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4398           for (i=0;i<neigs;i++) {
4399             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4400           }
4401         }
4402         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4403         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4404       }
4405       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4406       if (compute_eigs) {
4407         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4408         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4409       }
4410     }
4411   }
4412   /* print additional info */
4413   if (pcbddc->dbg_flag) {
4414     /* waits until all processes reaches this point */
4415     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4416     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4417     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4418   }
4419 
4420   /* free memory */
4421   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4422   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4423   PetscFunctionReturn(0);
4424 }
4425 
4426 #undef __FUNCT__
4427 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4428 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4429 {
4430   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4431   PC_IS*         pcis = (PC_IS*)pc->data;
4432   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4433   PetscInt       i,coarse_size;
4434   PetscInt       *local_primal_indices;
4435   PetscErrorCode ierr;
4436 
4437   PetscFunctionBegin;
4438   /* Compute global number of coarse dofs */
4439   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4440     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4441   }
4442   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4443 
4444   /* check numbering */
4445   if (pcbddc->dbg_flag) {
4446     PetscScalar coarsesum,*array;
4447     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4448 
4449     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4450     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4451     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4452     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4453     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4454     for (i=0;i<pcbddc->local_primal_size;i++) {
4455       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4456     }
4457     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4458     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4459     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4460     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4461     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4462     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4463     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4464     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4465     for (i=0;i<pcis->n;i++) {
4466       if (array[i] == 1.0) {
4467         set_error = PETSC_TRUE;
4468         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4469       }
4470     }
4471     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4472     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4473     for (i=0;i<pcis->n;i++) {
4474       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4475     }
4476     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4477     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4478     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4479     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4480     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4481     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4482     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4483       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4484       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4485       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4486       for (i=0;i<pcbddc->local_primal_size;i++) {
4487         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4488       }
4489       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4490     }
4491     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4492     if (set_error_reduced) {
4493       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4494     }
4495   }
4496   /* get back data */
4497   *coarse_size_n = coarse_size;
4498   *local_primal_indices_n = local_primal_indices;
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 #undef __FUNCT__
4503 #define __FUNCT__ "PCBDDCGlobalToLocal"
4504 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4505 {
4506   IS             localis_t;
4507   PetscInt       i,lsize,*idxs,n;
4508   PetscScalar    *vals;
4509   PetscErrorCode ierr;
4510 
4511   PetscFunctionBegin;
4512   /* get indices in local ordering exploiting local to global map */
4513   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4514   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4515   for (i=0;i<lsize;i++) vals[i] = 1.0;
4516   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4517   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4518   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4519   if (idxs) { /* multilevel guard */
4520     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4521   }
4522   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4523   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4524   ierr = PetscFree(vals);CHKERRQ(ierr);
4525   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4526   /* now compute set in local ordering */
4527   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4528   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4529   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4530   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4531   for (i=0,lsize=0;i<n;i++) {
4532     if (PetscRealPart(vals[i]) > 0.5) {
4533       lsize++;
4534     }
4535   }
4536   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4537   for (i=0,lsize=0;i<n;i++) {
4538     if (PetscRealPart(vals[i]) > 0.5) {
4539       idxs[lsize++] = i;
4540     }
4541   }
4542   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4543   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4544   *localis = localis_t;
4545   PetscFunctionReturn(0);
4546 }
4547 
4548 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4549 #undef __FUNCT__
4550 #define __FUNCT__ "PCBDDCMatMult_Private"
4551 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4552 {
4553   PCBDDCChange_ctx change_ctx;
4554   PetscErrorCode   ierr;
4555 
4556   PetscFunctionBegin;
4557   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4558   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4559   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4560   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4561   PetscFunctionReturn(0);
4562 }
4563 
4564 #undef __FUNCT__
4565 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4566 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4567 {
4568   PCBDDCChange_ctx change_ctx;
4569   PetscErrorCode   ierr;
4570 
4571   PetscFunctionBegin;
4572   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4573   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4574   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4575   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 #undef __FUNCT__
4580 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4581 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4582 {
4583   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4584   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4585   PetscInt            *used_xadj,*used_adjncy;
4586   PetscBool           free_used_adj;
4587   PetscErrorCode      ierr;
4588 
4589   PetscFunctionBegin;
4590   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4591   free_used_adj = PETSC_FALSE;
4592   if (pcbddc->sub_schurs_layers == -1) {
4593     used_xadj = NULL;
4594     used_adjncy = NULL;
4595   } else {
4596     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4597       used_xadj = pcbddc->mat_graph->xadj;
4598       used_adjncy = pcbddc->mat_graph->adjncy;
4599     } else if (pcbddc->computed_rowadj) {
4600       used_xadj = pcbddc->mat_graph->xadj;
4601       used_adjncy = pcbddc->mat_graph->adjncy;
4602     } else {
4603       Mat            mat_adj;
4604       PetscBool      flg_row=PETSC_TRUE;
4605       const PetscInt *xadj,*adjncy;
4606       PetscInt       nvtxs;
4607 
4608       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4609       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4610       if (!flg_row) {
4611         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4612       }
4613       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4614       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4615       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4616       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4617       if (!flg_row) {
4618         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4619       }
4620       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4621       free_used_adj = PETSC_TRUE;
4622     }
4623   }
4624   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->adaptive_invert_Stildas,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4625 
4626   /* free adjacency */
4627   if (free_used_adj) {
4628     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4629   }
4630   PetscFunctionReturn(0);
4631 }
4632 
4633 #undef __FUNCT__
4634 #define __FUNCT__ "PCBDDCInitSubSchurs"
4635 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4636 {
4637   PC_IS               *pcis=(PC_IS*)pc->data;
4638   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4639   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4640   PCBDDCGraph         graph;
4641   Mat                 S_j;
4642   PetscErrorCode      ierr;
4643 
4644   PetscFunctionBegin;
4645   /* attach interface graph for determining subsets */
4646   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4647     IS verticesIS;
4648 
4649     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4650     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4651     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4652     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4653     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4654     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4655 /*
4656     if (pcbddc->dbg_flag) {
4657       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4658     }
4659 */
4660   } else {
4661     graph = pcbddc->mat_graph;
4662   }
4663 
4664   /* Create Schur complement matrix */
4665   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4666   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4667 
4668   /* sub_schurs init */
4669   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4670   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4671   /* free graph struct */
4672   if (pcbddc->sub_schurs_rebuild) {
4673     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4674   }
4675   PetscFunctionReturn(0);
4676 }
4677