xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 08122e4325a89b7e1c4fe454da4f11b2780b06c8)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       n = PetscMax(n,nmin);
66       cum += subset_size*n;
67       cum2 += n;
68       maxneigs = PetscMax(maxneigs,n);
69     }
70   }
71 
72   if (mss) {
73     if (symmetric) {
74       PetscBLASInt B_itype = 1;
75       PetscBLASInt B_N = mss;
76       PetscScalar  zero = 0.0;
77       PetscScalar  eps = 0.0; /* dlamch? */
78 
79       B_lwork = -1;
80       S = NULL;
81       St = NULL;
82       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
83 #if defined(PETSC_USE_COMPLEX)
84       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
85 #else
86       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
87 #endif
88       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
89       ierr = PetscFPTrapPop();CHKERRQ(ierr);
90     } else {
91         /* TODO */
92     }
93   } else {
94     lwork = 0;
95   }
96 
97   nv = 0;
98   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
99     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
100   }
101   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
102   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
103                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
104 #if defined(PETSC_USE_COMPLEX)
105   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
106 #endif
107   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
108   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
109                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
110                       nv+cum,&pcbddc->adaptive_constraints_idxs,
111                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
112   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
113 
114   maxneigs = 0;
115   cum = cum2 = cumarray = 0;
116   if (sub_schurs->is_Ej_com) {
117     const PetscInt *idxs;
118 
119     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
120     for (cum=0;cum<nv;cum++) {
121       pcbddc->adaptive_constraints_n[cum] = 1;
122       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
123       pcbddc->adaptive_constraints_ptrs[cum] = cum;
124       pcbddc->adaptive_constraints_data[cum] = 1.0;
125     }
126     cum2 = cum;
127     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
128   }
129 
130   if (mss) { /* multilevel */
131     if (pcbddc->use_deluxe_scaling) {
132       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
133     } else {
134       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
135     }
136     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
137   }
138 
139   cumsubs = 0;
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,zero=0.0;
147       PetscBLASInt   B_N;
148 
149       /* S should be copied since we need it for deluxe scaling */
150       if (symmetric) {
151         PetscInt j;
152         for (j=0;j<subset_size;j++) {
153           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
154         }
155         for (j=0;j<subset_size;j++) {
156           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
157         }
158       } else {
159         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
160         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161       }
162       /* is always this the right matrix? */
163       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164 
165       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
166       /* St = Starray+cumarray; */
167 
168       if (symmetric) {
169         PetscBLASInt B_itype = 1;
170         PetscBLASInt B_IL = 1, B_IU;
171         PetscScalar  eps = -1.0; /* dlamch? */
172 
173         /* ask for eigenvalues lower than thresh */
174         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
175         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
176         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
177 #if defined(PETSC_USE_COMPLEX)
178         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
179 #else
180         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
181 #endif
182         ierr = PetscFPTrapPop();CHKERRQ(ierr);
183         if (B_ierr) {
184           if (B_ierr < 0 ) {
185             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
186           } else if (B_ierr <= B_N) {
187             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
188           } else {
189             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
190           }
191         }
192 
193         if (B_neigs > nmax) {
194           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax);
195           B_neigs = nmax;
196         }
197 
198         if (B_neigs < nmin) {
199           PetscBLASInt B_neigs2;
200 
201           B_IL = B_neigs + 1;
202           ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr);
203           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
204           if (symmetric) {
205             PetscInt j;
206             for (j=0;j<subset_size;j++) {
207               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
208             }
209             for (j=0;j<subset_size;j++) {
210               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
211             }
212           } else {
213             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
214             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
215           }
216           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
217 #if defined(PETSC_USE_COMPLEX)
218           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
219 #else
220           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
221 #endif
222           ierr = PetscFPTrapPop();CHKERRQ(ierr);
223           B_neigs += B_neigs2;
224         }
225         if (B_ierr) {
226           if (B_ierr < 0 ) {
227             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
228           } else if (B_ierr <= B_N) {
229             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
230           } else {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
232           }
233         }
234         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
235         for (j=0;j<B_neigs;j++) {
236           if (eigs[j] == 0.0) {
237             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
238           } else {
239             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
240           }
241         }
242       } else {
243           /* TODO */
244       }
245       maxneigs = PetscMax(B_neigs,maxneigs);
246       pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs;
247       cumsubs++;
248 
249       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
250       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
251       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
253       for (j=0;j<B_neigs;j++) {
254 #if 0
255         {
256           PetscBLASInt Blas_N,Blas_one = 1.0;
257           PetscScalar norm;
258           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
259           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
260           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
261             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
262           } else {
263             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
264           }
265           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
266         }
267 #endif
268         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
269         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
270         cum2 += subset_size;
271       }
272       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
273     }
274     /* shift for next computation */
275     cumarray += subset_size*subset_size;
276   }
277   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
278   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
279 
280   if (mss) {
281     if (pcbddc->use_deluxe_scaling) {
282       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
283     } else {
284       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
285     }
286     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
287   }
288   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
289 #if defined(PETSC_USE_COMPLEX)
290   ierr = PetscFree(rwork);CHKERRQ(ierr);
291 #endif
292   if (pcbddc->dbg_flag) {
293     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
294     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNCT__
300 #define __FUNCT__ "PCBDDCSetUpSolvers"
301 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
302 {
303   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
304   PetscScalar    *coarse_submat_vals;
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   /* Setup local scatters R_to_B and (optionally) R_to_D */
309   /* PCBDDCSetUpLocalWorkVectors should be called first! */
310   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
311 
312   /* Setup local neumann solver ksp_R */
313   /* PCBDDCSetUpLocalScatters should be called first! */
314   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
315 
316   /* Change global null space passed in by the user if change of basis has been requested */
317   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
318     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
319   }
320 
321   /*
322      Setup local correction and local part of coarse basis.
323      Gives back the dense local part of the coarse matrix in column major ordering
324   */
325   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
326 
327   /* Compute total number of coarse nodes and setup coarse solver */
328   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
329 
330   /* free */
331   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "PCBDDCResetCustomization"
337 PetscErrorCode PCBDDCResetCustomization(PC pc)
338 {
339   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
344   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
345   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
346   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
347   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
348   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
349   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
350   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
351   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
352   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
353   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "PCBDDCResetTopography"
359 PetscErrorCode PCBDDCResetTopography(PC pc)
360 {
361   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
362   PetscErrorCode ierr;
363 
364   PetscFunctionBegin;
365   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
366   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
367   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
368   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
369   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
370   PetscFunctionReturn(0);
371 }
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "PCBDDCResetSolvers"
375 PetscErrorCode PCBDDCResetSolvers(PC pc)
376 {
377   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
378   PetscErrorCode ierr;
379 
380   PetscFunctionBegin;
381   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
382   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
383   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
384   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
385   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
386   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
387   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
388   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
389   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
390   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
391   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
392   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
393   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
394   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
395   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
396   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
397   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
398   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
399   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
401   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
402   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
403   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
404   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
410 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PC_IS          *pcis = (PC_IS*)pc->data;
414   VecType        impVecType;
415   PetscInt       n_constraints,n_R,old_size;
416   PetscErrorCode ierr;
417 
418   PetscFunctionBegin;
419   if (!pcbddc->ConstraintMatrix) {
420     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
421   }
422   /* get sizes */
423   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
424   n_R = pcis->n-pcbddc->n_actual_vertices;
425   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
426   /* local work vectors (try to avoid unneeded work)*/
427   /* R nodes */
428   old_size = -1;
429   if (pcbddc->vec1_R) {
430     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
431   }
432   if (n_R != old_size) {
433     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
434     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
435     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
436     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
437     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
438     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
439   }
440   /* local primal dofs */
441   old_size = -1;
442   if (pcbddc->vec1_P) {
443     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
444   }
445   if (pcbddc->local_primal_size != old_size) {
446     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
447     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
448     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
449     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
450   }
451   /* local explicit constraints */
452   old_size = -1;
453   if (pcbddc->vec1_C) {
454     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
455   }
456   if (n_constraints && n_constraints != old_size) {
457     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
458     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
459     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
460     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
461   }
462   PetscFunctionReturn(0);
463 }
464 
465 #undef __FUNCT__
466 #define __FUNCT__ "PCBDDCSetUpCorrection"
467 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
468 {
469   PetscErrorCode         ierr;
470   /* pointers to pcis and pcbddc */
471   PC_IS*                 pcis = (PC_IS*)pc->data;
472   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
473   /* submatrices of local problem */
474   Mat                    A_RV,A_VR,A_VV;
475   /* working matrices */
476   Mat                    M1,M2,M3,C_CR;
477   /* working vectors */
478   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
479   /* additional working stuff */
480   IS                     is_aux;
481   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
482   const PetscScalar      *array,*row_cmat_values;
483   const PetscInt         *row_cmat_indices,*idx_R_local;
484   PetscInt               *idx_V_B,*auxindices;
485   PetscInt               n_vertices,n_constraints,size_of_constraint;
486   PetscInt               i,j,n_R,n_D,n_B;
487   PetscBool              unsymmetric_check;
488   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
489   MatType                impMatType;
490   /* some shortcuts to scalars */
491   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
492   /* for debugging purposes */
493   PetscReal              *coarsefunctions_errors,*constraints_errors;
494 
495   PetscFunctionBegin;
496   /* get number of vertices (corners plus constraints with change of basis)
497      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
498   n_vertices = pcbddc->n_actual_vertices;
499   n_constraints = pcbddc->local_primal_size-n_vertices;
500   /* Set Non-overlapping dimensions */
501   n_B = pcis->n_B; n_D = pcis->n - n_B;
502   n_R = pcis->n-n_vertices;
503 
504   /* Set types for local objects needed by BDDC precondtioner */
505   impMatType = MATSEQDENSE;
506 
507   /* Allocating some extra storage just to be safe */
508   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
509   for (i=0;i<pcis->n;i++) auxindices[i]=i;
510 
511   /* vertices in boundary numbering */
512   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
513   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
514   if (i != n_vertices) {
515     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
516   }
517 
518   /* Precompute stuffs needed for preprocessing and application of BDDC*/
519   if (n_constraints) {
520     /* see if we can save some allocations */
521     if (pcbddc->local_auxmat2) {
522       PetscInt on_R,on_constraints;
523       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
524       if (on_R != n_R || on_constraints != n_constraints) {
525         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
526         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
527       }
528     }
529     /* work vectors */
530     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
531     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
532     /* auxiliary matrices */
533     if (!pcbddc->local_auxmat2) {
534       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
535       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
536       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
537       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
538     }
539 
540     /* Extract constraints on R nodes: C_{CR}  */
541     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
542     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
543     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
544 
545     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
546     for (i=0;i<n_constraints;i++) {
547       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
548       /* Get row of constraint matrix in R numbering */
549       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
550       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
551       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
552       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
553       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
554       /* Solve for row of constraint matrix in R numbering */
555       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
556       /* Set values in local_auxmat2 */
557       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
558       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
559       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
560     }
561     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
562     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
563     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
564 
565     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
566     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
567     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
568     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
569     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
570     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
571     ierr = MatSetUp(M1);CHKERRQ(ierr);
572     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
573     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
574     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
575     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
576     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
577     ierr = MatDestroy(&M2);CHKERRQ(ierr);
578     ierr = MatDestroy(&M3);CHKERRQ(ierr);
579     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
580     if (!pcbddc->local_auxmat1) {
581       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
582     } else {
583       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
584     }
585   }
586 
587   /* Get submatrices from subdomain matrix */
588   if (n_vertices) {
589     PetscInt ibs,mbs;
590     PetscBool issbaij;
591     Mat newmat;
592 
593     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
594     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
595     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
596     if (ibs != mbs) { /* need to convert to SEQAIJ */
597       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
598       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
599       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
600       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
601       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
602     } else {
603       /* this is safe */
604       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
605       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
606       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
607         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
608         /* which of the two approaches is faster? */
609         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
610         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
611         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
612         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
613         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
614       } else {
615         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
616         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
617       }
618     }
619     ierr = MatCreateVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
620     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
621     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
622   }
623 
624   /* Matrix of coarse basis functions (local) */
625   if (pcbddc->coarse_phi_B) {
626     PetscInt on_B,on_primal;
627     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
628     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
629       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
630       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
631     }
632   }
633   if (pcbddc->coarse_phi_D) {
634     PetscInt on_D,on_primal;
635     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
636     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
637       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
638       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
639     }
640   }
641   if (!pcbddc->coarse_phi_B) {
642     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
643     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
644     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
645     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
646   }
647   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
648     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
649     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
650     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
651     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
652   }
653 
654   if (pcbddc->dbg_flag) {
655     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
656     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&coarsefunctions_errors);CHKERRQ(ierr);
657     ierr = PetscMalloc1(2*pcbddc->local_primal_size,&constraints_errors);CHKERRQ(ierr);
658   }
659   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
660   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
661 
662   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
663 
664   /* vertices */
665   for (i=0;i<n_vertices;i++) {
666     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
667     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
668     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
669     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
670     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
671     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
672     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
673     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
674     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
675     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
676     if (n_constraints) {
677       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
678       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
679       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
680     }
681     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
682     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
683 
684     /* Set values in coarse basis function and subdomain part of coarse_mat */
685     /* coarse basis functions */
686     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
687     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
688     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
689     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
690     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
691     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
692     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
693     if (pcbddc->switch_static || pcbddc->dbg_flag) {
694       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
697       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
698       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
699     }
700     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
701     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
702     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
703     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
704     if (n_constraints) {
705       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
706       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
707       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
708     }
709 
710     /* check */
711     if (pcbddc->dbg_flag) {
712       /* assemble subdomain vector on local nodes */
713       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
714       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
715       if (n_R) {
716         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
717       }
718       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
719       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
720       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
721       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
722       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
723       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
724       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
725       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
726       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
727       if (n_constraints) {
728         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
729         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
730         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
731       }
732       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
733       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
734       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
735       /* check saddle point solution */
736       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
737       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
738       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
739       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
740       /* shift by the identity matrix */
741       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
742       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
743       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
744       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
745     }
746   }
747 
748   /* constraints */
749   for (i=0;i<n_constraints;i++) {
750     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
751     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
752     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
753     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
754     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
755     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
756     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
757     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
758     if (n_vertices) {
759       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
760     }
761     /* Set values in coarse basis function and subdomain part of coarse_mat */
762     /* coarse basis functions */
763     j = i+n_vertices; /* don't touch this! */
764     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
765     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
766     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
767     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
768     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
769     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
770     if (pcbddc->switch_static || pcbddc->dbg_flag) {
771       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
772       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
773       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
774       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
775       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
776     }
777     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
778     if (n_vertices) {
779       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
780       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
781       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
782     }
783     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
784     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
785     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
786 
787     if (pcbddc->dbg_flag) {
788       /* assemble subdomain vector on nodes */
789       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
790       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
791       if (n_R) {
792         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
793       }
794       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
795       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
796       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
797       /* assemble subdomain vector of lagrange multipliers */
798       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
799       if (n_vertices) {
800         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
801         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
802         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
803       }
804       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
805       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
806       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
807       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
808       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
809       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
810       /* check saddle point solution */
811       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
812       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
813       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
814       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
815       /* shift by the identity matrix */
816       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
817       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
818       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
819       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
820     }
821   }
822   /* call assembling routines for local coarse basis */
823   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
825   if (pcbddc->switch_static || pcbddc->dbg_flag) {
826     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
827     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
828   }
829 
830   /* compute other basis functions for non-symmetric problems */
831   if (!pcbddc->issym) {
832     if (!pcbddc->coarse_psi_B) {
833       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
834       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
835       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
836       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
837     }
838     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
839       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
840       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
841       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
842       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
843     }
844     for (i=0;i<pcbddc->local_primal_size;i++) {
845       if (n_constraints) {
846         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
847         for (j=0;j<n_constraints;j++) {
848           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
849         }
850         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
851         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
852       }
853       if (i<n_vertices) {
854         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
855         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
856         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
857         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
858         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
859         if (n_constraints) {
860           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
861         }
862       } else {
863         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
864       }
865       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
866       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
867       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
870       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
871       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
872       if (i<n_vertices) {
873         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
874       }
875       if (pcbddc->switch_static || pcbddc->dbg_flag) {
876         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
879         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
880         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
881       }
882 
883       if (pcbddc->dbg_flag) {
884         /* assemble subdomain vector on nodes */
885         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
886         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
887         if (n_R) {
888           ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
889         }
890         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
891         if (i<n_vertices) {
892           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
893         }
894         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
895         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
896         /* assemble subdomain vector of lagrange multipliers */
897         for (j=0;j<pcbddc->local_primal_size;j++) {
898           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
899         }
900         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
901         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
902         /* check saddle point solution */
903         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
904         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
905         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
906         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
907         /* shift by the identity matrix */
908         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
909         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
910         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
911         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
912       }
913     }
914     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
915     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
916     if (pcbddc->switch_static || pcbddc->dbg_flag) {
917       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
918       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
919     }
920     unsymmetric_check = PETSC_TRUE;
921   } else { /* take references to already computed coarse basis */
922     unsymmetric_check = PETSC_FALSE;
923     ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
924     pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
925     if (pcbddc->coarse_phi_D) {
926       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
927       pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
928     }
929   }
930   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
931   /* Checking coarse_sub_mat and coarse basis functios */
932   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
933   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
934   if (pcbddc->dbg_flag) {
935     Mat         coarse_sub_mat;
936     Mat         AUXMAT,TM1,TM2,TM3,TM4;
937     Mat         coarse_phi_D,coarse_phi_B;
938     Mat         coarse_psi_D,coarse_psi_B;
939     Mat         A_II,A_BB,A_IB,A_BI;
940     MatType     checkmattype=MATSEQAIJ;
941     PetscReal   real_value;
942 
943     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
944     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
945     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
946     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
947     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
948     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
949     if (unsymmetric_check) {
950       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
951       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
952     }
953     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
954 
955     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
956     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
957     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
958     if (unsymmetric_check) {
959       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
960       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
961       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
962       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
963       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
964       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
965       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
966       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
967       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
968       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
969       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
970       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
971     } else {
972       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
973       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
974       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
975       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
976       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
977       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
978       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
979       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
980     }
981     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
982     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
983     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
984     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
985     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
986     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
987     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
988     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
989     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
990     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
991     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
992     for (i=0;i<pcbddc->local_primal_size;i++) {
993       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
994     }
995     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
996     for (i=0;i<pcbddc->local_primal_size;i++) {
997       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
998     }
999     if (unsymmetric_check) {
1000       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
1001       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
1002         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
1003       }
1004       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
1005       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
1006         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
1007       }
1008     }
1009     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1010     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1011     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1012     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1013     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1014     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1015     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1016     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1017     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1018     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1019     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1020     if (unsymmetric_check) {
1021       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1022       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1023     }
1024     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1025     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
1026     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
1027     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
1028   }
1029   /* free memory */
1030   if (n_vertices) {
1031     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
1032     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
1033     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1034     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1035     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1036   }
1037   if (n_constraints) {
1038     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
1039     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
1040     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1041     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1042   }
1043   ierr = PetscFree(auxindices);CHKERRQ(ierr);
1044   /* get back data */
1045   *coarse_submat_vals_n = coarse_submat_vals;
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1051 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1052 {
1053   Mat            *work_mat;
1054   IS             isrow_s,iscol_s;
1055   PetscBool      rsorted,csorted;
1056   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1061   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1062   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1063   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1064 
1065   if (!rsorted) {
1066     const PetscInt *idxs;
1067     PetscInt *idxs_sorted,i;
1068 
1069     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1070     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1071     for (i=0;i<rsize;i++) {
1072       idxs_perm_r[i] = i;
1073     }
1074     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1075     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1076     for (i=0;i<rsize;i++) {
1077       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1078     }
1079     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1080     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1081   } else {
1082     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1083     isrow_s = isrow;
1084   }
1085 
1086   if (!csorted) {
1087     if (isrow == iscol) {
1088       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1089       iscol_s = isrow_s;
1090     } else {
1091       const PetscInt *idxs;
1092       PetscInt *idxs_sorted,i;
1093 
1094       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1095       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1096       for (i=0;i<csize;i++) {
1097         idxs_perm_c[i] = i;
1098       }
1099       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1100       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1101       for (i=0;i<csize;i++) {
1102         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1103       }
1104       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1105       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1106     }
1107   } else {
1108     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1109     iscol_s = iscol;
1110   }
1111 
1112   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1113 
1114   if (!rsorted || !csorted) {
1115     Mat      new_mat;
1116     IS       is_perm_r,is_perm_c;
1117 
1118     if (!rsorted) {
1119       PetscInt *idxs_r,i;
1120       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1121       for (i=0;i<rsize;i++) {
1122         idxs_r[idxs_perm_r[i]] = i;
1123       }
1124       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1125       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1126     } else {
1127       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1128     }
1129     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1130 
1131     if (!csorted) {
1132       if (isrow_s == iscol_s) {
1133         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1134         is_perm_c = is_perm_r;
1135       } else {
1136         PetscInt *idxs_c,i;
1137         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1138         for (i=0;i<csize;i++) {
1139           idxs_c[idxs_perm_c[i]] = i;
1140         }
1141         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1142         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1143       }
1144     } else {
1145       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1146     }
1147     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1148 
1149     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1150     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1151     work_mat[0] = new_mat;
1152     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1153     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1154   }
1155 
1156   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1157   *B = work_mat[0];
1158   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1159   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1160   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1166 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1167 {
1168   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1169   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1170   Mat            new_mat;
1171   IS             is_local,is_global;
1172   PetscInt       local_size;
1173   PetscBool      isseqaij;
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1178   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1179   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1180   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1181   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1182   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1183   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1184 
1185   /* check */
1186   if (pcbddc->dbg_flag) {
1187     Vec       x,x_change;
1188     PetscReal error;
1189 
1190     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1191     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1192     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1193     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1194     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1195     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1196     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1197     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1198     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1199     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1200     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1201     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1202     ierr = VecDestroy(&x);CHKERRQ(ierr);
1203     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1204   }
1205 
1206   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1207   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1208   if (isseqaij) {
1209     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1210   } else {
1211     Mat work_mat;
1212     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1213     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1214     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1215   }
1216   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1217   /*
1218   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1219   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1220   */
1221   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1227 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1228 {
1229   PC_IS*         pcis = (PC_IS*)(pc->data);
1230   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1231   IS             is_aux1,is_aux2;
1232   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1233   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1234   PetscInt       vbs,bs;
1235   PetscBT        bitmask;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   /*
1240     No need to setup local scatters if
1241       - primal space is unchanged
1242         AND
1243       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1244         AND
1245       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1246   */
1247   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1248     PetscFunctionReturn(0);
1249   }
1250   /* destroy old objects */
1251   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1252   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1253   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1254   /* Set Non-overlapping dimensions */
1255   n_B = pcis->n_B; n_D = pcis->n - n_B;
1256   n_vertices = pcbddc->n_actual_vertices;
1257   /* create auxiliary bitmask */
1258   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1259   for (i=0;i<n_vertices;i++) {
1260     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1261   }
1262 
1263   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1264   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1265   for (i=0, n_R=0; i<pcis->n; i++) {
1266     if (!PetscBTLookup(bitmask,i)) {
1267       idx_R_local[n_R] = i;
1268       n_R++;
1269     }
1270   }
1271 
1272   /* Block code */
1273   vbs = 1;
1274   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1275   if (bs>1 && !(n_vertices%bs)) {
1276     PetscBool is_blocked = PETSC_TRUE;
1277     PetscInt  *vary;
1278     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1279     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1280     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1281     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1282     for (i=0; i<n_vertices; i++) {
1283       if (vary[i]!=0 && vary[i]!=bs) {
1284         is_blocked = PETSC_FALSE;
1285         break;
1286       }
1287     }
1288     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1289       vbs = bs;
1290       for (i=0;i<n_R/vbs;i++) {
1291         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1292       }
1293     }
1294     ierr = PetscFree(vary);CHKERRQ(ierr);
1295   }
1296   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1297   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1298 
1299   /* print some info if requested */
1300   if (pcbddc->dbg_flag) {
1301     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1302     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1303     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1304     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1305     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1306     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1307     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1308     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1309   }
1310 
1311   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1312   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1313   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1314   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1315   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1316   for (i=0; i<n_D; i++) {
1317     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1318   }
1319   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1320   for (i=0, j=0; i<n_R; i++) {
1321     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1322       aux_array1[j++] = i;
1323     }
1324   }
1325   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1326   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1327   for (i=0, j=0; i<n_B; i++) {
1328     if (!PetscBTLookup(bitmask,is_indices[i])) {
1329       aux_array2[j++] = i;
1330     }
1331   }
1332   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1333   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1334   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1335   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1336   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1337 
1338   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1339     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1340     for (i=0, j=0; i<n_R; i++) {
1341       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1342         aux_array1[j++] = i;
1343       }
1344     }
1345     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1346     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1347     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1348   }
1349   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1350   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1357 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1358 {
1359   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1360   PC_IS          *pcis = (PC_IS*)pc->data;
1361   PC             pc_temp;
1362   Mat            A_RR;
1363   MatReuse       reuse;
1364   PetscScalar    m_one = -1.0;
1365   PetscReal      value;
1366   PetscInt       n_D,n_R,ibs,mbs;
1367   PetscBool      use_exact,use_exact_reduced,issbaij;
1368   PetscErrorCode ierr;
1369   /* prefixes stuff */
1370   char           dir_prefix[256],neu_prefix[256],str_level[16];
1371   size_t         len;
1372 
1373   PetscFunctionBegin;
1374 
1375   /* compute prefixes */
1376   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1377   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1378   if (!pcbddc->current_level) {
1379     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1380     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1381     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1382     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1383   } else {
1384     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1385     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1386     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1387     len -= 15; /* remove "pc_bddc_coarse_" */
1388     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1389     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1390     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1391     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1392     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1393     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1394     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1395     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1396   }
1397 
1398   /* DIRICHLET PROBLEM */
1399   if (dirichlet) {
1400     /* Matrix for Dirichlet problem is pcis->A_II */
1401     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1402     if (!pcbddc->ksp_D) { /* create object if not yet build */
1403       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1404       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1405       /* default */
1406       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1407       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1408       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1409       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1410       if (issbaij) {
1411         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1412       } else {
1413         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1414       }
1415       /* Allow user's customization */
1416       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1417       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1418     }
1419     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1420     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1421     if (!n_D) {
1422       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1423       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1424     }
1425     /* Set Up KSP for Dirichlet problem of BDDC */
1426     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1427     /* set ksp_D into pcis data */
1428     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1429     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1430     pcis->ksp_D = pcbddc->ksp_D;
1431   }
1432 
1433   /* NEUMANN PROBLEM */
1434   A_RR = 0;
1435   if (neumann) {
1436     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1437     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1438     if (pcbddc->ksp_R) { /* already created ksp */
1439       PetscInt nn_R;
1440       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1441       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1442       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1443       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1444         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1445         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1446         reuse = MAT_INITIAL_MATRIX;
1447       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1448         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1449           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1450           reuse = MAT_INITIAL_MATRIX;
1451         } else { /* safe to reuse the matrix */
1452           reuse = MAT_REUSE_MATRIX;
1453         }
1454       }
1455       /* last check */
1456       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1457         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1458         reuse = MAT_INITIAL_MATRIX;
1459       }
1460     } else { /* first time, so we need to create the matrix */
1461       reuse = MAT_INITIAL_MATRIX;
1462     }
1463     /* extract A_RR */
1464     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1465     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1466     if (ibs != mbs) {
1467       Mat newmat;
1468       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1469       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1470       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1471     } else {
1472       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1473     }
1474     if (!pcbddc->ksp_R) { /* create object if not present */
1475       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1476       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1477       /* default */
1478       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1479       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1480       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1481       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1482       if (issbaij) {
1483         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1484       } else {
1485         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1486       }
1487       /* Allow user's customization */
1488       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1489       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1490     }
1491     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1492     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1493     if (!n_R) {
1494       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1495       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1496     }
1497     /* Set Up KSP for Neumann problem of BDDC */
1498     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1499   }
1500 
1501   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1502   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1503     if (pcbddc->dbg_flag) {
1504       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1505       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1506       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1507     }
1508     if (dirichlet) { /* Dirichlet */
1509       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1510       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1511       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1512       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1513       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1514       /* need to be adapted? */
1515       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1516       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1517       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1518       /* print info */
1519       if (pcbddc->dbg_flag) {
1520         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1521         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1522       }
1523       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1524         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1525       }
1526     }
1527     if (neumann) { /* Neumann */
1528       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1529       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1530       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1531       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1532       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1533       /* need to be adapted? */
1534       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1535       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1536       /* print info */
1537       if (pcbddc->dbg_flag) {
1538         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1539         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       }
1541       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1542         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1543       }
1544     }
1545   }
1546   /* free Neumann problem's matrix */
1547   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1553 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1554 {
1555   PetscErrorCode ierr;
1556   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1557 
1558   PetscFunctionBegin;
1559   if (applytranspose) {
1560     if (pcbddc->local_auxmat1) {
1561       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1562       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1563     }
1564     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1565   } else {
1566     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1567     if (pcbddc->local_auxmat1) {
1568       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1569       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1570     }
1571   }
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1576 #undef __FUNCT__
1577 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1578 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1579 {
1580   PetscErrorCode ierr;
1581   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1582   PC_IS*            pcis = (PC_IS*)  (pc->data);
1583   const PetscScalar zero = 0.0;
1584 
1585   PetscFunctionBegin;
1586   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1587   if (applytranspose) {
1588     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1589     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1590   } else {
1591     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1592     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1593   }
1594   /* start communications from local primal nodes to rhs of coarse solver */
1595   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1596   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1597   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1598 
1599   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1600   /* TODO remove null space when doing multilevel */
1601   if (pcbddc->coarse_ksp) {
1602     if (applytranspose) {
1603       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1604     } else {
1605       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1606     }
1607   }
1608 
1609   /* Local solution on R nodes */
1610   if (pcis->n) {
1611     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1612     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1613     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1614     if (pcbddc->switch_static) {
1615       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1616       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1617     }
1618     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1619     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1620     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1621     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1622     if (pcbddc->switch_static) {
1623       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625     }
1626   }
1627 
1628   /* communications from coarse sol to local primal nodes */
1629   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1630   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1631 
1632   /* Sum contributions from two levels */
1633   if (applytranspose) {
1634     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1635     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1636   } else {
1637     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1638     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1639   }
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1644 #undef __FUNCT__
1645 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1646 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1647 {
1648   PetscErrorCode ierr;
1649   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1650   PetscScalar    *array,*array2;
1651   Vec            from,to;
1652 
1653   PetscFunctionBegin;
1654   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1655     from = pcbddc->coarse_vec;
1656     to = pcbddc->vec1_P;
1657     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1658       Vec tvec;
1659       PetscInt lsize;
1660       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1661       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1662       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1663       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1664       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1665       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1666       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1667     }
1668   } else { /* from local to global -> put data in coarse right hand side */
1669     from = pcbddc->vec1_P;
1670     to = pcbddc->coarse_vec;
1671   }
1672   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1678 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1679 {
1680   PetscErrorCode ierr;
1681   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1682   PetscScalar    *array,*array2;
1683   Vec            from,to;
1684 
1685   PetscFunctionBegin;
1686   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1687     from = pcbddc->coarse_vec;
1688     to = pcbddc->vec1_P;
1689   } else { /* from local to global -> put data in coarse right hand side */
1690     from = pcbddc->vec1_P;
1691     to = pcbddc->coarse_vec;
1692   }
1693   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1694   if (smode == SCATTER_FORWARD) {
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697       PetscInt lsize;
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1700       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1702       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1703       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1704       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1705     }
1706   }
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 /* uncomment for testing purposes */
1711 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1712 #undef __FUNCT__
1713 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1714 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1715 {
1716   PetscErrorCode    ierr;
1717   PC_IS*            pcis = (PC_IS*)(pc->data);
1718   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1719   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1720   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1721   MatType           impMatType=MATSEQAIJ;
1722   /* one and zero */
1723   PetscScalar       one=1.0,zero=0.0;
1724   /* space to store constraints and their local indices */
1725   PetscScalar       *temp_quadrature_constraint;
1726   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1727   /* iterators */
1728   PetscInt          i,j,k,total_counts,temp_start_ptr;
1729   /* stuff to store connected components stored in pcbddc->mat_graph */
1730   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1731   PetscInt          n_ISForFaces,n_ISForEdges;
1732   /* near null space stuff */
1733   MatNullSpace      nearnullsp;
1734   const Vec         *nearnullvecs;
1735   Vec               *localnearnullsp;
1736   PetscBool         nnsp_has_cnst;
1737   PetscInt          nnsp_size;
1738   PetscScalar       *array;
1739   /* BLAS integers */
1740   PetscBLASInt      lwork,lierr;
1741   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1742   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1743   /* LAPACK working arrays for SVD or POD */
1744   PetscBool         skip_lapack;
1745   PetscScalar       *work;
1746   PetscReal         *singular_vals;
1747 #if defined(PETSC_USE_COMPLEX)
1748   PetscReal         *rwork;
1749 #endif
1750 #if defined(PETSC_MISSING_LAPACK_GESVD)
1751   PetscBLASInt      Blas_one_2=1;
1752   PetscScalar       *temp_basis,*correlation_mat;
1753 #else
1754   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1755   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1756 #endif
1757   /* reuse */
1758   PetscInt          olocal_primal_size;
1759   PetscInt          *oprimal_indices_local_idxs;
1760   /* change of basis */
1761   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1762   PetscBool         boolforchange,qr_needed;
1763   PetscBT           touched,change_basis,qr_needed_idx;
1764   /* auxiliary stuff */
1765   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1766   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1767   PetscScalar       *temp_quadrature_constraint_work;
1768   /* some quantities */
1769   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1770   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1771 
1772 
1773   PetscFunctionBegin;
1774   /* Destroy Mat objects computed previously */
1775   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1776   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1777   /* TODO synch with adaptive selection */
1778   /* Get index sets for faces, edges and vertices from graph */
1779   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1780   /* free unneeded index sets */
1781   if (!pcbddc->use_vertices) {
1782     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1783   }
1784   if (!pcbddc->use_edges) {
1785     for (i=0;i<n_ISForEdges;i++) {
1786       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1787     }
1788     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1789     n_ISForEdges = 0;
1790   }
1791   if (!pcbddc->use_faces) {
1792     for (i=0;i<n_ISForFaces;i++) {
1793       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1794     }
1795     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1796     n_ISForFaces = 0;
1797   }
1798   /* HACKS (the following two blocks of code) */
1799   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1800     pcbddc->use_change_of_basis = PETSC_TRUE;
1801     if (!ISForEdges) {
1802       pcbddc->use_change_on_faces = PETSC_TRUE;
1803     }
1804   }
1805   if (pcbddc->NullSpace) {
1806     /* use_change_of_basis should be consistent among processors */
1807     PetscBool tbool[2],gbool[2];
1808     tbool [0] = pcbddc->use_change_of_basis;
1809     tbool [1] = pcbddc->use_change_on_faces;
1810     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1811     pcbddc->use_change_of_basis = gbool[0];
1812     pcbddc->use_change_on_faces = gbool[1];
1813   }
1814   /* print some info */
1815   if (pcbddc->dbg_flag) {
1816     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1817     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1818     i = 0;
1819     if (ISForVertices) {
1820       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1821     }
1822     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1823     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1824     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1825     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1826   }
1827 
1828   if (!pcbddc->adaptive_selection) {
1829     /* check if near null space is attached to global mat */
1830     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1831     if (nearnullsp) {
1832       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1833       /* remove any stored info */
1834       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1835       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1836       /* store information for BDDC solver reuse */
1837       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1838       pcbddc->onearnullspace = nearnullsp;
1839       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1840       for (i=0;i<nnsp_size;i++) {
1841         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1842       }
1843     } else { /* if near null space is not provided BDDC uses constants by default */
1844       nnsp_size = 0;
1845       nnsp_has_cnst = PETSC_TRUE;
1846     }
1847     /* get max number of constraints on a single cc */
1848     max_constraints = nnsp_size;
1849     if (nnsp_has_cnst) max_constraints++;
1850 
1851     /*
1852          Evaluate maximum storage size needed by the procedure
1853          - temp_indices will contain start index of each constraint stored as follows
1854          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1855          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1856          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1857                                                                                                                                                            */
1858     total_counts = n_ISForFaces+n_ISForEdges;
1859     total_counts *= max_constraints;
1860     n_vertices = 0;
1861     if (ISForVertices) {
1862       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1863     }
1864     total_counts += n_vertices;
1865     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1866     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1867     total_counts = 0;
1868     max_size_of_constraint = 0;
1869     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1870       if (i<n_ISForEdges) {
1871         used_IS = &ISForEdges[i];
1872       } else {
1873         used_IS = &ISForFaces[i-n_ISForEdges];
1874       }
1875       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1876       total_counts += j;
1877       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1878     }
1879     total_counts *= max_constraints;
1880     total_counts += n_vertices;
1881     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1882     /* get local part of global near null space vectors */
1883     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1884     for (k=0;k<nnsp_size;k++) {
1885       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1886       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1887       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1888     }
1889 
1890     /* whether or not to skip lapack calls */
1891     skip_lapack = PETSC_TRUE;
1892     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1893 
1894     /* allocate some auxiliary stuff */
1895     if (!skip_lapack || pcbddc->use_qr_single) {
1896       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1897     }
1898 
1899     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1900     if (!skip_lapack) {
1901       PetscScalar temp_work;
1902 
1903 #if defined(PETSC_MISSING_LAPACK_GESVD)
1904       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1905       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1906       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1907       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1908 #if defined(PETSC_USE_COMPLEX)
1909       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1910 #endif
1911       /* now we evaluate the optimal workspace using query with lwork=-1 */
1912       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1913       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1914       lwork = -1;
1915       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1916 #if !defined(PETSC_USE_COMPLEX)
1917       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1918 #else
1919       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1920 #endif
1921       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1922       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1923 #else /* on missing GESVD */
1924       /* SVD */
1925       PetscInt max_n,min_n;
1926       max_n = max_size_of_constraint;
1927       min_n = max_constraints;
1928       if (max_size_of_constraint < max_constraints) {
1929         min_n = max_size_of_constraint;
1930         max_n = max_constraints;
1931       }
1932       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1933 #if defined(PETSC_USE_COMPLEX)
1934       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1935 #endif
1936       /* now we evaluate the optimal workspace using query with lwork=-1 */
1937       lwork = -1;
1938       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1939       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1940       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1941       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1942 #if !defined(PETSC_USE_COMPLEX)
1943       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1944 #else
1945       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1946 #endif
1947       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1948       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1949 #endif /* on missing GESVD */
1950       /* Allocate optimal workspace */
1951       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1952       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1953     }
1954     /* Now we can loop on constraining sets */
1955     total_counts = 0;
1956     temp_indices[0] = 0;
1957     /* vertices */
1958     if (ISForVertices) {
1959       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1960       if (nnsp_has_cnst) { /* consider all vertices */
1961         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1962         for (i=0;i<n_vertices;i++) {
1963           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1964           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1965           total_counts++;
1966         }
1967       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1968         PetscBool used_vertex;
1969         for (i=0;i<n_vertices;i++) {
1970           used_vertex = PETSC_FALSE;
1971           k = 0;
1972           while (!used_vertex && k<nnsp_size) {
1973             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1974             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1975               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1976               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1977               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1978               total_counts++;
1979               used_vertex = PETSC_TRUE;
1980             }
1981             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1982             k++;
1983           }
1984         }
1985       }
1986       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1987       n_vertices = total_counts;
1988     }
1989 
1990     /* edges and faces */
1991     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1992       if (ncc<n_ISForEdges) {
1993         used_IS = &ISForEdges[ncc];
1994         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1995       } else {
1996         used_IS = &ISForFaces[ncc-n_ISForEdges];
1997         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1998       }
1999       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2000       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2001       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2002       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2003       /* change of basis should not be performed on local periodic nodes */
2004       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2005       if (nnsp_has_cnst) {
2006         PetscScalar quad_value;
2007         temp_constraints++;
2008         if (!pcbddc->use_nnsp_true) {
2009           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2010         } else {
2011           quad_value = 1.0;
2012         }
2013         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2014         for (j=0;j<size_of_constraint;j++) {
2015           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2016         }
2017         /* sort by global ordering if using lapack subroutines (not needed!) */
2018         if (!skip_lapack || pcbddc->use_qr_single) {
2019           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2020           for (j=0;j<size_of_constraint;j++) {
2021             permutation[j]=j;
2022           }
2023           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2024           for (j=0;j<size_of_constraint;j++) {
2025             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2026           }
2027           for (j=0;j<size_of_constraint;j++) {
2028             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2029             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2030           }
2031           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2032           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2033         }
2034         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2035         total_counts++;
2036       }
2037       for (k=0;k<nnsp_size;k++) {
2038         PetscReal real_value;
2039         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2040         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2041         for (j=0;j<size_of_constraint;j++) {
2042           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2043         }
2044         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2045         /* check if array is null on the connected component */
2046         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2047         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2048         if (real_value > 0.0) { /* keep indices and values */
2049           /* sort by global ordering if using lapack subroutines */
2050           if (!skip_lapack || pcbddc->use_qr_single) {
2051             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2052             for (j=0;j<size_of_constraint;j++) {
2053               permutation[j]=j;
2054             }
2055             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2056             for (j=0;j<size_of_constraint;j++) {
2057               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2058               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2059             }
2060             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2061             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2062           }
2063           temp_constraints++;
2064           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2065           total_counts++;
2066         }
2067       }
2068       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2069       valid_constraints = temp_constraints;
2070       if (!pcbddc->use_nnsp_true && temp_constraints) {
2071         if (temp_constraints == 1) { /* just normalize the constraint */
2072           PetscScalar norm;
2073           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2074           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2075           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2076           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2077         } else { /* perform SVD */
2078           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2079 
2080 #if defined(PETSC_MISSING_LAPACK_GESVD)
2081           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2082              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2083              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2084                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2085                 from that computed using LAPACKgesvd
2086              -> This is due to a different computation of eigenvectors in LAPACKheev
2087              -> The quality of the POD-computed basis will be the same */
2088           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2089           /* Store upper triangular part of correlation matrix */
2090           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2091           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2092           for (j=0;j<temp_constraints;j++) {
2093             for (k=0;k<j+1;k++) {
2094               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2095             }
2096           }
2097           /* compute eigenvalues and eigenvectors of correlation matrix */
2098           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2099           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2100 #if !defined(PETSC_USE_COMPLEX)
2101           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2102 #else
2103           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2104 #endif
2105           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2106           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2107           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2108           j = 0;
2109           while (j < temp_constraints && singular_vals[j] < tol) j++;
2110           total_counts = total_counts-j;
2111           valid_constraints = temp_constraints-j;
2112           /* scale and copy POD basis into used quadrature memory */
2113           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2114           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2115           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2116           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2117           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2118           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2119           if (j<temp_constraints) {
2120             PetscInt ii;
2121             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2122             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2123             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2124             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2125             for (k=0;k<temp_constraints-j;k++) {
2126               for (ii=0;ii<size_of_constraint;ii++) {
2127                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2128               }
2129             }
2130           }
2131 #else  /* on missing GESVD */
2132           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2133           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2134           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2135           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2136 #if !defined(PETSC_USE_COMPLEX)
2137           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2138 #else
2139           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2140 #endif
2141           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2142           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2143           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2144           k = temp_constraints;
2145           if (k > size_of_constraint) k = size_of_constraint;
2146           j = 0;
2147           while (j < k && singular_vals[k-j-1] < tol) j++;
2148           valid_constraints = k-j;
2149           total_counts = total_counts-temp_constraints+valid_constraints;
2150 #endif /* on missing GESVD */
2151         }
2152       }
2153       /* setting change_of_basis flag is safe now */
2154       if (boolforchange) {
2155         for (j=0;j<valid_constraints;j++) {
2156           PetscBTSet(change_basis,total_counts-j-1);
2157         }
2158       }
2159     }
2160     /* free workspace */
2161     if (!skip_lapack || pcbddc->use_qr_single) {
2162       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2163     }
2164     if (!skip_lapack) {
2165       ierr = PetscFree(work);CHKERRQ(ierr);
2166 #if defined(PETSC_USE_COMPLEX)
2167       ierr = PetscFree(rwork);CHKERRQ(ierr);
2168 #endif
2169       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2170 #if defined(PETSC_MISSING_LAPACK_GESVD)
2171       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2172       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2173 #endif
2174     }
2175     for (k=0;k<nnsp_size;k++) {
2176       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2177     }
2178     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2179   } else {
2180     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2181     PetscInt        cum = 0;
2182 
2183     total_counts = 0;
2184     n_vertices = 0;
2185     if (sub_schurs->is_Ej_com) {
2186       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2187     }
2188     max_constraints = 0;
2189     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2190       total_counts += pcbddc->adaptive_constraints_n[i];
2191       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2192     }
2193 
2194     temp_indices = pcbddc->adaptive_constraints_ptrs;
2195     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2196     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2197 
2198 #if 0
2199     printf("Found %d totals\n",total_counts);
2200     for (i=0;i<total_counts;i++) {
2201       printf("const %d, start %d",i,temp_indices[i]);
2202       printf(" end %d:\n",temp_indices[i+1]);
2203       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2204         printf("  idxs %d",temp_indices_to_constraint[j]);
2205         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2206       }
2207     }
2208 #endif
2209 
2210     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2211     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2212     /* Change of basis */
2213     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2214     cum = 0;
2215     if (pcbddc->use_change_of_basis) {
2216       for (i=0;i<sub_schurs->n_subs;i++) {
2217         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2218           for (j=0;j<pcbddc->adaptive_constraints_n[i];j++) {
2219             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2220           }
2221         }
2222         cum += pcbddc->adaptive_constraints_n[i];
2223       }
2224     }
2225   }
2226 
2227   /* free index sets of faces, edges and vertices */
2228   for (i=0;i<n_ISForFaces;i++) {
2229     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2230   }
2231   if (n_ISForFaces) {
2232     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2233   }
2234   for (i=0;i<n_ISForEdges;i++) {
2235     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2236   }
2237   if (n_ISForEdges) {
2238     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2239   }
2240   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2241 
2242   /* map temp_indices_to_constraint in boundary numbering */
2243   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2244   if (i != temp_indices[total_counts]) {
2245     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2246   }
2247 
2248   /* set quantities in pcbddc data structure and store previous primal size */
2249   /* n_vertices defines the number of subdomain corners in the primal space */
2250   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2251   olocal_primal_size = pcbddc->local_primal_size;
2252   pcbddc->local_primal_size = total_counts;
2253   pcbddc->n_vertices = n_vertices;
2254   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2255 
2256   /* Create constraint matrix */
2257   /* The constraint matrix is used to compute the l2g map of primal dofs */
2258   /* so we need to set it up properly either with or without change of basis */
2259   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2260   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
2261   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2262   /* array to compute a local numbering of constraints : vertices first then constraints */
2263   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2264   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2265   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2266   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2267   /* auxiliary stuff for basis change */
2268   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2269   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2270 
2271   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2272   total_primal_vertices=0;
2273   for (i=0;i<pcbddc->local_primal_size;i++) {
2274     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2275     if (size_of_constraint == 1) {
2276       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2277       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2278       aux_primal_minloc[total_primal_vertices]=0;
2279       total_primal_vertices++;
2280     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2281       PetscInt min_loc,min_index;
2282       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2283       /* find first untouched local node */
2284       k = 0;
2285       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2286       min_index = global_indices[k];
2287       min_loc = k;
2288       /* search the minimum among global nodes already untouched on the cc */
2289       for (k=1;k<size_of_constraint;k++) {
2290         /* there can be more than one constraint on a single connected component */
2291         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2292           min_index = global_indices[k];
2293           min_loc = k;
2294         }
2295       }
2296       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2297       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2298       aux_primal_minloc[total_primal_vertices]=min_loc;
2299       total_primal_vertices++;
2300     }
2301   }
2302   /* determine if a QR strategy is needed for change of basis */
2303   qr_needed = PETSC_FALSE;
2304   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2305   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2306     if (PetscBTLookup(change_basis,i)) {
2307       if (!pcbddc->use_qr_single) {
2308         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2309         j = 0;
2310         for (k=0;k<size_of_constraint;k++) {
2311           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2312             j++;
2313           }
2314         }
2315         /* found more than one primal dof on the cc */
2316         if (j > 1) {
2317           PetscBTSet(qr_needed_idx,i);
2318           qr_needed = PETSC_TRUE;
2319         }
2320       } else {
2321         PetscBTSet(qr_needed_idx,i);
2322         qr_needed = PETSC_TRUE;
2323       }
2324     }
2325   }
2326   /* free workspace */
2327   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2328 
2329   /* permute indices in order to have a sorted set of vertices */
2330   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2331 
2332   /* nonzero structure of constraint matrix */
2333   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2334   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2335   j=total_primal_vertices;
2336   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2337     if (!PetscBTLookup(change_basis,i)) {
2338       nnz[j]=temp_indices[i+1]-temp_indices[i];
2339       j++;
2340     }
2341   }
2342   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2343   ierr = PetscFree(nnz);CHKERRQ(ierr);
2344   /* set values in constraint matrix */
2345   for (i=0;i<total_primal_vertices;i++) {
2346     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2347   }
2348   total_counts = total_primal_vertices;
2349   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2350     if (!PetscBTLookup(change_basis,i)) {
2351       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2352       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2353       total_counts++;
2354     }
2355   }
2356   /* assembling */
2357   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   /*
2360   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2361   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2362   */
2363   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2364   if (pcbddc->use_change_of_basis) {
2365     /* dual and primal dofs on a single cc */
2366     PetscInt     dual_dofs,primal_dofs;
2367     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2368     PetscInt     primal_counter;
2369     /* working stuff for GEQRF */
2370     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2371     PetscBLASInt lqr_work;
2372     /* working stuff for UNGQR */
2373     PetscScalar  *gqr_work,lgqr_work_t;
2374     PetscBLASInt lgqr_work;
2375     /* working stuff for TRTRS */
2376     PetscScalar  *trs_rhs;
2377     PetscBLASInt Blas_NRHS;
2378     /* pointers for values insertion into change of basis matrix */
2379     PetscInt     *start_rows,*start_cols;
2380     PetscScalar  *start_vals;
2381     /* working stuff for values insertion */
2382     PetscBT      is_primal;
2383     /* matrix sizes */
2384     PetscInt     global_size,local_size;
2385     /* work array for nonzeros */
2386     PetscScalar  *nnz_array;
2387     /* temporary change of basis */
2388     Mat          localChangeOfBasisMatrix;
2389     /* auxiliary work for global change of basis */
2390     Vec          nnz_vec;
2391     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
2392     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
2393     PetscScalar  *vals;
2394     PetscBool    done;
2395 
2396     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2397     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2398     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2399     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2400 
2401     /* nonzeros for local mat */
2402     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2403     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2404     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2405       if (PetscBTLookup(change_basis,i)) {
2406         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2407         if (PetscBTLookup(qr_needed_idx,i)) {
2408           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2409         } else {
2410           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2411           /* get local primal index on the cc */
2412           j = 0;
2413           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2414           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2415         }
2416       }
2417     }
2418     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2419     /* Set initial identity in the matrix */
2420     for (i=0;i<pcis->n_B;i++) {
2421       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2422     }
2423 
2424     if (pcbddc->dbg_flag) {
2425       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2426       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2427     }
2428 
2429 
2430     /* Now we loop on the constraints which need a change of basis */
2431     /*
2432        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2433        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2434 
2435        Basic blocks of change of basis matrix T computed by
2436 
2437           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2438 
2439             | 1        0   ...        0         s_1/S |
2440             | 0        1   ...        0         s_2/S |
2441             |              ...                        |
2442             | 0        ...            1     s_{n-1}/S |
2443             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2444 
2445             with S = \sum_{i=1}^n s_i^2
2446             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2447                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2448 
2449           - QR decomposition of constraints otherwise
2450     */
2451     if (qr_needed) {
2452       /* space to store Q */
2453       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2454       /* first we issue queries for optimal work */
2455       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2456       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2457       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2458       lqr_work = -1;
2459       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2460       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2461       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2462       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2463       lgqr_work = -1;
2464       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2465       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2466       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2467       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2468       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2469       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2470       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2471       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2472       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2473       /* array to store scaling factors for reflectors */
2474       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2475       /* array to store rhs and solution of triangular solver */
2476       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2477       /* allocating workspace for check */
2478       if (pcbddc->dbg_flag) {
2479         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2480       }
2481     }
2482     /* array to store whether a node is primal or not */
2483     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2484     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2485     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2486     if (i != total_primal_vertices) {
2487       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2488     }
2489     for (i=0;i<total_primal_vertices;i++) {
2490       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2491     }
2492     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2493 
2494     /* loop on constraints and see whether or not they need a change of basis and compute it */
2495     /* -> using implicit ordering contained in temp_indices data */
2496     total_counts = pcbddc->n_vertices;
2497     primal_counter = total_counts;
2498     while (total_counts<pcbddc->local_primal_size) {
2499       primal_dofs = 1;
2500       if (PetscBTLookup(change_basis,total_counts)) {
2501         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2502         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2503           primal_dofs++;
2504         }
2505         /* get constraint info */
2506         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2507         dual_dofs = size_of_constraint-primal_dofs;
2508 
2509         if (pcbddc->dbg_flag) {
2510           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2511         }
2512 
2513         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2514 
2515           /* copy quadrature constraints for change of basis check */
2516           if (pcbddc->dbg_flag) {
2517             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2518           }
2519           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2520           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2521 
2522           /* compute QR decomposition of constraints */
2523           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2524           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2525           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2526           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2527           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2528           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2529           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2530 
2531           /* explictly compute R^-T */
2532           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2533           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2534           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2535           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2536           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2537           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2538           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2539           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2540           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2541           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2542 
2543           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2544           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2545           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2546           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2547           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2548           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2549           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2550           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2551           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2552 
2553           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2554              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2555              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2556           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2557           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2558           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2559           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2560           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2561           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2562           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2563           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2564           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2565           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2566 
2567           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2568           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2569           /* insert cols for primal dofs */
2570           for (j=0;j<primal_dofs;j++) {
2571             start_vals = &qr_basis[j*size_of_constraint];
2572             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2573             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2574           }
2575           /* insert cols for dual dofs */
2576           for (j=0,k=0;j<dual_dofs;k++) {
2577             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2578               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2579               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2580               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2581               j++;
2582             }
2583           }
2584 
2585           /* check change of basis */
2586           if (pcbddc->dbg_flag) {
2587             PetscInt   ii,jj;
2588             PetscBool valid_qr=PETSC_TRUE;
2589             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2590             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2591             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2592             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2593             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2594             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2595             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2596             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2597             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2598             for (jj=0;jj<size_of_constraint;jj++) {
2599               for (ii=0;ii<primal_dofs;ii++) {
2600                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2601                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2602               }
2603             }
2604             if (!valid_qr) {
2605               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2606               for (jj=0;jj<size_of_constraint;jj++) {
2607                 for (ii=0;ii<primal_dofs;ii++) {
2608                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2609                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2610                   }
2611                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2612                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2613                   }
2614                 }
2615               }
2616             } else {
2617               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2618             }
2619           }
2620         } else { /* simple transformation block */
2621           PetscInt    row,col;
2622           PetscScalar val,norm;
2623 
2624           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2625           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2626           for (j=0;j<size_of_constraint;j++) {
2627             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2628             if (!PetscBTLookup(is_primal,row)) {
2629               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2630               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2631               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2632             } else {
2633               for (k=0;k<size_of_constraint;k++) {
2634                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2635                 if (row != col) {
2636                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2637                 } else {
2638                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2639                 }
2640                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2641               }
2642             }
2643           }
2644           if (pcbddc->dbg_flag) {
2645             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2646           }
2647         }
2648         /* increment primal counter */
2649         primal_counter += primal_dofs;
2650       } else {
2651         if (pcbddc->dbg_flag) {
2652           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2653         }
2654       }
2655       /* increment constraint counter total_counts */
2656       total_counts += primal_dofs;
2657     }
2658 
2659     /* free workspace */
2660     if (qr_needed) {
2661       if (pcbddc->dbg_flag) {
2662         ierr = PetscFree(work);CHKERRQ(ierr);
2663       }
2664       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2665       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2666       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2667       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2668       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2669     }
2670     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2671     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2672     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2673 
2674     /* assembling of global change of variable */
2675     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2676     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2677     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2678     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2679     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2680     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2681 
2682     /* nonzeros (overestimated) */
2683     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2684     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2685     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2686     for (i=0;i<pcis->n;i++) {
2687       nnz_array[i] = 1.0;
2688       idxs_all[i] = i;
2689     }
2690     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2691     for (i=0;i<pcis->n_B;i++) {
2692       nnz_array[idxs_B[i]] = nnz[i];
2693     }
2694     if (pcis->n) {
2695       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2696     }
2697     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2698     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2699     ierr = PetscFree(nnz);CHKERRQ(ierr);
2700     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2701     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2702     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2703     for (i=0;i<local_size;i++) {
2704       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2705       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2706     }
2707     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2708     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2709     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2710     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2711 
2712     /* Set identity on dirichlet dofs */
2713     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2714     for (i=0;i<pcis->n-pcis->n_B;i++) {
2715       PetscScalar one=1.0;
2716       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2717     }
2718     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2719 
2720     /* Set values at interface dofs */
2721     done = PETSC_TRUE;
2722     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2723     if (!done) {
2724       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2725     }
2726     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2727     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2728     ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2729     for (i=0;i<nvtxs;i++) {
2730       PetscInt    row,*cols,ncols;
2731       PetscScalar *mat_vals;
2732 
2733       row = idxs_B[i];
2734       ncols = xadj[i+1]-xadj[i];
2735       cols = idxs_mapped+xadj[i];
2736       mat_vals = vals+xadj[i];
2737       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2738     }
2739     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2740     if (!done) {
2741       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2742     }
2743     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2744     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2745     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2746     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2747     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2748 
2749     /* check */
2750     if (pcbddc->dbg_flag) {
2751       PetscReal error;
2752       Vec       x,x_change;
2753 
2754       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2755       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2756       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2757       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2758       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2759       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2760       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2761       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2762       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2763       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2764       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2765       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2766       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2767       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2768       ierr = VecDestroy(&x);CHKERRQ(ierr);
2769       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2770     }
2771 
2772     /* adapt sub_schurs computed (if any) */
2773     if (pcbddc->use_deluxe_scaling) {
2774       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2775       if (sub_schurs->n_subs_par_g) {
2776         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2777       }
2778       if (sub_schurs->S_Ej_all) {
2779         Mat S_1,S_2,tmat;
2780         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,sub_schurs->is_Ej_all,sub_schurs->is_Ej_all,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2781         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2782         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2783         sub_schurs->S_Ej_all = S_1;
2784         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2785         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2786         sub_schurs->sum_S_Ej_all = S_2;
2787         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2788       }
2789     }
2790     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2791   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2792     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2793     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2794   }
2795 
2796   /* set up change of basis context */
2797   if (pcbddc->ChangeOfBasisMatrix) {
2798     PCBDDCChange_ctx change_ctx;
2799 
2800     if (!pcbddc->new_global_mat) {
2801       PetscInt global_size,local_size;
2802 
2803       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2804       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2805       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2806       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2807       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2808       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2809       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2810       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2811       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2812     } else {
2813       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2814       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2815       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2816     }
2817     if (!pcbddc->user_ChangeOfBasisMatrix) {
2818       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2819       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2820     } else {
2821       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2822       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2823     }
2824     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2825     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2826     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2827     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2828   }
2829 
2830   /* get indices in local ordering for vertices and constraints */
2831   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2832     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2833     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2834   }
2835   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2836   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2837   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2838   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2839   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2840   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2841   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2842   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2843   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2844   /* set quantities in PCBDDC data struct */
2845   pcbddc->n_actual_vertices = i;
2846   /* check if a new primal space has been introduced */
2847   pcbddc->new_primal_space_local = PETSC_TRUE;
2848   if (olocal_primal_size == pcbddc->local_primal_size) {
2849     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2850     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2851     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2852   }
2853   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2854   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2855 
2856   /* flush dbg viewer */
2857   if (pcbddc->dbg_flag) {
2858     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2859   }
2860 
2861   /* free workspace */
2862   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2863   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2864   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2865   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2866   if (!pcbddc->adaptive_selection) {
2867     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2868     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2869   } else {
2870     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2871                       pcbddc->adaptive_constraints_ptrs,
2872                       pcbddc->adaptive_constraints_idxs,
2873                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2874     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2875   }
2876   PetscFunctionReturn(0);
2877 }
2878 
2879 #undef __FUNCT__
2880 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2881 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2882 {
2883   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2884   PC_IS       *pcis = (PC_IS*)pc->data;
2885   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2886   PetscInt    ierr,i,vertex_size;
2887   PetscViewer viewer=pcbddc->dbg_viewer;
2888 
2889   PetscFunctionBegin;
2890   /* Reset previously computed graph */
2891   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2892   /* Init local Graph struct */
2893   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2894 
2895   /* Check validity of the csr graph passed in by the user */
2896   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2897     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2898   }
2899 
2900   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2901   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2902     Mat       mat_adj;
2903     PetscInt  *xadj,*adjncy;
2904     PetscInt  nvtxs;
2905     PetscBool flg_row=PETSC_TRUE;
2906 
2907     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2908     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2909     if (!flg_row) {
2910       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2911     }
2912     if (pcbddc->use_local_adj) {
2913       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2914       pcbddc->computed_rowadj = PETSC_TRUE;
2915     } else { /* just compute subdomain's connected components */
2916       IS                     is_dummy;
2917       ISLocalToGlobalMapping l2gmap_dummy;
2918       PetscInt               j,sum;
2919       PetscInt               *cxadj,*cadjncy;
2920       const PetscInt         *idxs;
2921       PCBDDCGraph            graph;
2922       PetscBT                is_on_boundary;
2923 
2924       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2925       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2926       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2927       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2928       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2929       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2930       graph->xadj = xadj;
2931       graph->adjncy = adjncy;
2932       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2933       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2934 
2935       if (pcbddc->dbg_flag) {
2936         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2937         for (i=0;i<graph->ncc;i++) {
2938           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2939         }
2940         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2941       }
2942 
2943       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2944       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2945       for (i=0;i<pcis->n_B;i++) {
2946         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2947       }
2948       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2949 
2950       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2951       sum = 0;
2952       for (i=0;i<graph->ncc;i++) {
2953         PetscInt sizecc = 0;
2954         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2955           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2956             sizecc++;
2957           }
2958         }
2959         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2960           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2961             cxadj[graph->queue[j]] = sizecc;
2962           }
2963         }
2964         sum += sizecc*sizecc;
2965       }
2966       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2967       sum = 0;
2968       for (i=0;i<nvtxs;i++) {
2969         PetscInt temp = cxadj[i];
2970         cxadj[i] = sum;
2971         sum += temp;
2972       }
2973       cxadj[nvtxs] = sum;
2974       for (i=0;i<graph->ncc;i++) {
2975         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2976           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2977             PetscInt k,sizecc = 0;
2978             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2979               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2980                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2981                 sizecc++;
2982               }
2983             }
2984           }
2985         }
2986       }
2987       if (nvtxs) {
2988         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2989       } else {
2990         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2991         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2992       }
2993       graph->xadj = 0;
2994       graph->adjncy = 0;
2995       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2996       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2997     }
2998     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2999     if (!flg_row) {
3000       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
3001     }
3002     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
3003   }
3004 
3005   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3006   vertex_size = 1;
3007   if (pcbddc->user_provided_isfordofs) {
3008     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3009       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3010       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3011         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3012         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3013       }
3014       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3015       pcbddc->n_ISForDofs = 0;
3016       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3017     }
3018     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3019     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3020   } else {
3021     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3022       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3023       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3024       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3025         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3026       }
3027     }
3028   }
3029 
3030   /* Setup of Graph */
3031   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3032     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3033   }
3034   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3035     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3036   }
3037   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
3038 
3039   /* Graph's connected components analysis */
3040   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3041 
3042   /* print some info to stdout */
3043   if (pcbddc->dbg_flag) {
3044     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3045   }
3046 
3047   /* mark topography has done */
3048   pcbddc->recompute_topography = PETSC_FALSE;
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 #undef __FUNCT__
3053 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3054 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3055 {
3056   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3057   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3058   PetscErrorCode ierr;
3059 
3060   PetscFunctionBegin;
3061   n = 0;
3062   vertices = 0;
3063   if (pcbddc->ConstraintMatrix) {
3064     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3065     for (i=0;i<local_primal_size;i++) {
3066       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3067       if (size_of_constraint == 1) n++;
3068       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3069     }
3070     if (vertices_idx) {
3071       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3072       n = 0;
3073       for (i=0;i<local_primal_size;i++) {
3074         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3075         if (size_of_constraint == 1) {
3076           vertices[n++]=row_cmat_indices[0];
3077         }
3078         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3079       }
3080     }
3081   }
3082   *n_vertices = n;
3083   if (vertices_idx) *vertices_idx = vertices;
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 #undef __FUNCT__
3088 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3089 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3090 {
3091   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3092   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3093   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3094   PetscBT        touched;
3095   PetscErrorCode ierr;
3096 
3097     /* This function assumes that the number of local constraints per connected component
3098        is not greater than the number of nodes defined for the connected component
3099        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3100   PetscFunctionBegin;
3101   n = 0;
3102   constraints_index = 0;
3103   if (pcbddc->ConstraintMatrix) {
3104     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3105     max_size_of_constraint = 0;
3106     for (i=0;i<local_primal_size;i++) {
3107       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3108       if (size_of_constraint > 1) {
3109         n++;
3110       }
3111       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3112       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3113     }
3114     if (constraints_idx) {
3115       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3116       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3117       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3118       n = 0;
3119       for (i=0;i<local_primal_size;i++) {
3120         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3121         if (size_of_constraint > 1) {
3122           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3123           /* find first untouched local node */
3124           j = 0;
3125           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3126           min_index = row_cmat_global_indices[j];
3127           min_loc = j;
3128           /* search the minimum among nodes not yet touched on the connected component
3129              since there can be more than one constraint on a single cc */
3130           for (j=1;j<size_of_constraint;j++) {
3131             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3132               min_index = row_cmat_global_indices[j];
3133               min_loc = j;
3134             }
3135           }
3136           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3137           constraints_index[n++] = row_cmat_indices[min_loc];
3138         }
3139         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3140       }
3141       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3142       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3143     }
3144   }
3145   *n_constraints = n;
3146   if (constraints_idx) *constraints_idx = constraints_index;
3147   PetscFunctionReturn(0);
3148 }
3149 
3150 #undef __FUNCT__
3151 #define __FUNCT__ "PCBDDCSubsetNumbering"
3152 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3153 {
3154   Vec            local_vec,global_vec;
3155   IS             seqis,paris;
3156   VecScatter     scatter_ctx;
3157   PetscScalar    *array;
3158   PetscInt       *temp_global_dofs;
3159   PetscScalar    globalsum;
3160   PetscInt       i,j,s;
3161   PetscInt       nlocals,first_index,old_index,max_local;
3162   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3163   PetscMPIInt    *dof_sizes,*dof_displs;
3164   PetscBool      first_found;
3165   PetscErrorCode ierr;
3166 
3167   PetscFunctionBegin;
3168   /* mpi buffers */
3169   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3170   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3171   j = ( !rank_prec_comm ? size_prec_comm : 0);
3172   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3173   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3174   /* get maximum size of subset */
3175   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3176   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3177   max_local = 0;
3178   for (i=0;i<n_local_dofs;i++) {
3179     if (max_local < temp_global_dofs[i] ) {
3180       max_local = temp_global_dofs[i];
3181     }
3182   }
3183   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3184   max_global++;
3185   max_local = 0;
3186   for (i=0;i<n_local_dofs;i++) {
3187     if (max_local < local_dofs[i] ) {
3188       max_local = local_dofs[i];
3189     }
3190   }
3191   max_local++;
3192   /* allocate workspace */
3193   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3194   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3195   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3196   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3197   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3198   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3199   /* create scatter */
3200   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3201   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3202   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3203   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3204   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3205   /* init array */
3206   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3207   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3208   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3209   if (local_dofs_mult) {
3210     for (i=0;i<n_local_dofs;i++) {
3211       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3212     }
3213   } else {
3214     for (i=0;i<n_local_dofs;i++) {
3215       array[local_dofs[i]]=1.0;
3216     }
3217   }
3218   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3219   /* scatter into global vec and get total number of global dofs */
3220   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3221   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3222   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3223   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3224   /* Fill global_vec with cumulative function for global numbering */
3225   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3226   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3227   nlocals = 0;
3228   first_index = -1;
3229   first_found = PETSC_FALSE;
3230   for (i=0;i<s;i++) {
3231     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3232       first_found = PETSC_TRUE;
3233       first_index = i;
3234     }
3235     nlocals += (PetscInt)PetscRealPart(array[i]);
3236   }
3237   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3238   if (!rank_prec_comm) {
3239     dof_displs[0]=0;
3240     for (i=1;i<size_prec_comm;i++) {
3241       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3242     }
3243   }
3244   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3245   if (first_found) {
3246     array[first_index] += (PetscScalar)nlocals;
3247     old_index = first_index;
3248     for (i=first_index+1;i<s;i++) {
3249       if (PetscRealPart(array[i]) > 0.1) {
3250         array[i] += array[old_index];
3251         old_index = i;
3252       }
3253     }
3254   }
3255   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3256   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3257   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3258   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3259   /* get global ordering of local dofs */
3260   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3261   if (local_dofs_mult) {
3262     for (i=0;i<n_local_dofs;i++) {
3263       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3264     }
3265   } else {
3266     for (i=0;i<n_local_dofs;i++) {
3267       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3268     }
3269   }
3270   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3271   /* free workspace */
3272   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3273   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3274   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3275   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3276   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3277   /* return pointer to global ordering of local dofs */
3278   *global_numbering_subset = temp_global_dofs;
3279   PetscFunctionReturn(0);
3280 }
3281 
3282 #undef __FUNCT__
3283 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3284 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3285 {
3286   PetscInt       i,j;
3287   PetscScalar    *alphas;
3288   PetscErrorCode ierr;
3289 
3290   PetscFunctionBegin;
3291   /* this implements stabilized Gram-Schmidt */
3292   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3293   for (i=0;i<n;i++) {
3294     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3295     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3296     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3297   }
3298   ierr = PetscFree(alphas);CHKERRQ(ierr);
3299   PetscFunctionReturn(0);
3300 }
3301 
3302 #undef __FUNCT__
3303 #define __FUNCT__ "MatISGetSubassemblingPattern"
3304 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3305 {
3306   Mat             subdomain_adj;
3307   IS              new_ranks,ranks_send_to;
3308   MatPartitioning partitioner;
3309   Mat_IS          *matis;
3310   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3311   PetscInt        prank;
3312   PetscMPIInt     size,rank,color;
3313   PetscInt        *xadj,*adjncy,*oldranks;
3314   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3315   PetscInt        i,local_size,threshold=0;
3316   PetscErrorCode  ierr;
3317   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3318   PetscSubcomm    subcomm;
3319 
3320   PetscFunctionBegin;
3321   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3322   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3323   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3324 
3325   /* Get info on mapping */
3326   matis = (Mat_IS*)(mat->data);
3327   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3328   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3329 
3330   /* build local CSR graph of subdomains' connectivity */
3331   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3332   xadj[0] = 0;
3333   xadj[1] = PetscMax(n_neighs-1,0);
3334   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3335   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3336 
3337   if (threshold) {
3338     PetscInt xadj_count = 0;
3339     for (i=1;i<n_neighs;i++) {
3340       if (n_shared[i] > threshold) {
3341         adjncy[xadj_count] = neighs[i];
3342         adjncy_wgt[xadj_count] = n_shared[i];
3343         xadj_count++;
3344       }
3345     }
3346     xadj[1] = xadj_count;
3347   } else {
3348     if (xadj[1]) {
3349       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3350       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3351     }
3352   }
3353   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3354   if (use_square) {
3355     for (i=0;i<xadj[1];i++) {
3356       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3357     }
3358   }
3359   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3360 
3361   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3362 
3363   /*
3364     Restrict work on active processes only.
3365   */
3366   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3367   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3368   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3369   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3370   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3371   if (color) {
3372     ierr = PetscFree(xadj);CHKERRQ(ierr);
3373     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3374     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3375   } else {
3376     PetscInt coarsening_ratio;
3377     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3378     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3379     prank = rank;
3380     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3381     /*
3382     for (i=0;i<size;i++) {
3383       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3384     }
3385     */
3386     for (i=0;i<xadj[1];i++) {
3387       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3388     }
3389     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3390     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3391     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3392 
3393     /* Partition */
3394     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3395     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3396     if (use_vwgt) {
3397       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3398       v_wgt[0] = local_size;
3399       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3400     }
3401     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3402     coarsening_ratio = size/n_subdomains;
3403     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3404     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3405     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3406     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3407 
3408     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3409     if (contiguous) {
3410       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3411     } else {
3412       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3413     }
3414     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3415     /* clean up */
3416     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3417     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3418     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3419     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3420   }
3421   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3422 
3423   /* assemble parallel IS for sends */
3424   i = 1;
3425   if (color) i=0;
3426   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3427 
3428   /* get back IS */
3429   *is_sends = ranks_send_to;
3430   PetscFunctionReturn(0);
3431 }
3432 
3433 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3434 
3435 #undef __FUNCT__
3436 #define __FUNCT__ "MatISSubassemble"
3437 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3438 {
3439   Mat                    local_mat;
3440   Mat_IS                 *matis;
3441   IS                     is_sends_internal;
3442   PetscInt               rows,cols,new_local_rows;
3443   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3444   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3445   ISLocalToGlobalMapping l2gmap;
3446   PetscInt*              l2gmap_indices;
3447   const PetscInt*        is_indices;
3448   MatType                new_local_type;
3449   /* buffers */
3450   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3451   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3452   PetscInt               *recv_buffer_idxs_local;
3453   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3454   /* MPI */
3455   MPI_Comm               comm,comm_n;
3456   PetscSubcomm           subcomm;
3457   PetscMPIInt            n_sends,n_recvs,commsize;
3458   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3459   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3460   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3461   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3462   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3463   PetscErrorCode         ierr;
3464 
3465   PetscFunctionBegin;
3466   /* TODO: add missing checks */
3467   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3468   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3469   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3470   PetscValidLogicalCollectiveInt(mat,nis,7);
3471   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3472   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3473   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3474   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3475   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3476   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3477   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3478   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3479     PetscInt mrows,mcols,mnrows,mncols;
3480     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3481     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3482     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3483     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3484     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3485     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3486   }
3487   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3488   PetscValidLogicalCollectiveInt(mat,bs,0);
3489   /* prepare IS for sending if not provided */
3490   if (!is_sends) {
3491     PetscBool pcontig = PETSC_TRUE;
3492     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3493     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3494   } else {
3495     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3496     is_sends_internal = is_sends;
3497   }
3498 
3499   /* get pointer of MATIS data */
3500   matis = (Mat_IS*)mat->data;
3501 
3502   /* get comm */
3503   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3504 
3505   /* compute number of sends */
3506   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3507   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3508 
3509   /* compute number of receives */
3510   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3511   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3512   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3513   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3514   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3515   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3516   ierr = PetscFree(iflags);CHKERRQ(ierr);
3517 
3518   /* restrict comm if requested */
3519   subcomm = 0;
3520   destroy_mat = PETSC_FALSE;
3521   if (restrict_comm) {
3522     PetscMPIInt color,rank,subcommsize;
3523     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3524     color = 0;
3525     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3526     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3527     subcommsize = commsize - subcommsize;
3528     /* check if reuse has been requested */
3529     if (reuse == MAT_REUSE_MATRIX) {
3530       if (*mat_n) {
3531         PetscMPIInt subcommsize2;
3532         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3533         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3534         comm_n = PetscObjectComm((PetscObject)*mat_n);
3535       } else {
3536         comm_n = PETSC_COMM_SELF;
3537       }
3538     } else { /* MAT_INITIAL_MATRIX */
3539       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3540       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3541       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3542       comm_n = subcomm->comm;
3543     }
3544     /* flag to destroy *mat_n if not significative */
3545     if (color) destroy_mat = PETSC_TRUE;
3546   } else {
3547     comm_n = comm;
3548   }
3549 
3550   /* prepare send/receive buffers */
3551   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3552   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3553   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3554   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3555   if (nis) {
3556     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3557   }
3558 
3559   /* Get data from local matrices */
3560   if (!isdense) {
3561     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3562     /* TODO: See below some guidelines on how to prepare the local buffers */
3563     /*
3564        send_buffer_vals should contain the raw values of the local matrix
3565        send_buffer_idxs should contain:
3566        - MatType_PRIVATE type
3567        - PetscInt        size_of_l2gmap
3568        - PetscInt        global_row_indices[size_of_l2gmap]
3569        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3570     */
3571   } else {
3572     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3573     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3574     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3575     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3576     send_buffer_idxs[1] = i;
3577     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3578     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3579     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3580     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3581     for (i=0;i<n_sends;i++) {
3582       ilengths_vals[is_indices[i]] = len*len;
3583       ilengths_idxs[is_indices[i]] = len+2;
3584     }
3585   }
3586   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3587   /* additional is (if any) */
3588   if (nis) {
3589     PetscMPIInt psum;
3590     PetscInt j;
3591     for (j=0,psum=0;j<nis;j++) {
3592       PetscInt plen;
3593       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3594       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3595       psum += len+1; /* indices + lenght */
3596     }
3597     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3598     for (j=0,psum=0;j<nis;j++) {
3599       PetscInt plen;
3600       const PetscInt *is_array_idxs;
3601       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3602       send_buffer_idxs_is[psum] = plen;
3603       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3604       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3605       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3606       psum += plen+1; /* indices + lenght */
3607     }
3608     for (i=0;i<n_sends;i++) {
3609       ilengths_idxs_is[is_indices[i]] = psum;
3610     }
3611     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3612   }
3613 
3614   buf_size_idxs = 0;
3615   buf_size_vals = 0;
3616   buf_size_idxs_is = 0;
3617   for (i=0;i<n_recvs;i++) {
3618     buf_size_idxs += (PetscInt)olengths_idxs[i];
3619     buf_size_vals += (PetscInt)olengths_vals[i];
3620     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3621   }
3622   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3623   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3624   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3625 
3626   /* get new tags for clean communications */
3627   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3628   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3629   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3630 
3631   /* allocate for requests */
3632   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3633   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3634   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3635   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3636   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3637   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3638 
3639   /* communications */
3640   ptr_idxs = recv_buffer_idxs;
3641   ptr_vals = recv_buffer_vals;
3642   ptr_idxs_is = recv_buffer_idxs_is;
3643   for (i=0;i<n_recvs;i++) {
3644     source_dest = onodes[i];
3645     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3646     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3647     ptr_idxs += olengths_idxs[i];
3648     ptr_vals += olengths_vals[i];
3649     if (nis) {
3650       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3651       ptr_idxs_is += olengths_idxs_is[i];
3652     }
3653   }
3654   for (i=0;i<n_sends;i++) {
3655     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3656     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3657     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3658     if (nis) {
3659       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3660     }
3661   }
3662   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3663   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3664 
3665   /* assemble new l2g map */
3666   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3667   ptr_idxs = recv_buffer_idxs;
3668   new_local_rows = 0;
3669   for (i=0;i<n_recvs;i++) {
3670     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3671     ptr_idxs += olengths_idxs[i];
3672   }
3673   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3674   ptr_idxs = recv_buffer_idxs;
3675   new_local_rows = 0;
3676   for (i=0;i<n_recvs;i++) {
3677     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3678     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3679     ptr_idxs += olengths_idxs[i];
3680   }
3681   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3682   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3683   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3684 
3685   /* infer new local matrix type from received local matrices type */
3686   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3687   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3688   if (n_recvs) {
3689     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3690     ptr_idxs = recv_buffer_idxs;
3691     for (i=0;i<n_recvs;i++) {
3692       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3693         new_local_type_private = MATAIJ_PRIVATE;
3694         break;
3695       }
3696       ptr_idxs += olengths_idxs[i];
3697     }
3698     switch (new_local_type_private) {
3699       case MATDENSE_PRIVATE:
3700         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3701           new_local_type = MATSEQAIJ;
3702           bs = 1;
3703         } else { /* if I receive only 1 dense matrix */
3704           new_local_type = MATSEQDENSE;
3705           bs = 1;
3706         }
3707         break;
3708       case MATAIJ_PRIVATE:
3709         new_local_type = MATSEQAIJ;
3710         bs = 1;
3711         break;
3712       case MATBAIJ_PRIVATE:
3713         new_local_type = MATSEQBAIJ;
3714         break;
3715       case MATSBAIJ_PRIVATE:
3716         new_local_type = MATSEQSBAIJ;
3717         break;
3718       default:
3719         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3720         break;
3721     }
3722   } else { /* by default, new_local_type is seqdense */
3723     new_local_type = MATSEQDENSE;
3724     bs = 1;
3725   }
3726 
3727   /* create MATIS object if needed */
3728   if (reuse == MAT_INITIAL_MATRIX) {
3729     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3730     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3731   } else {
3732     /* it also destroys the local matrices */
3733     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3734   }
3735   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3736   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3737 
3738   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3739 
3740   /* Global to local map of received indices */
3741   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3742   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3743   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3744 
3745   /* restore attributes -> type of incoming data and its size */
3746   buf_size_idxs = 0;
3747   for (i=0;i<n_recvs;i++) {
3748     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3749     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3750     buf_size_idxs += (PetscInt)olengths_idxs[i];
3751   }
3752   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3753 
3754   /* set preallocation */
3755   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3756   if (!newisdense) {
3757     PetscInt *new_local_nnz=0;
3758 
3759     ptr_vals = recv_buffer_vals;
3760     ptr_idxs = recv_buffer_idxs_local;
3761     if (n_recvs) {
3762       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3763     }
3764     for (i=0;i<n_recvs;i++) {
3765       PetscInt j;
3766       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3767         for (j=0;j<*(ptr_idxs+1);j++) {
3768           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3769         }
3770       } else {
3771         /* TODO */
3772       }
3773       ptr_idxs += olengths_idxs[i];
3774     }
3775     if (new_local_nnz) {
3776       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3777       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3778       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3779       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3780       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3781       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3782     } else {
3783       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3784     }
3785     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3786   } else {
3787     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3788   }
3789 
3790   /* set values */
3791   ptr_vals = recv_buffer_vals;
3792   ptr_idxs = recv_buffer_idxs_local;
3793   for (i=0;i<n_recvs;i++) {
3794     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3795       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3796       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3797       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3798       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3799       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3800     } else {
3801       /* TODO */
3802     }
3803     ptr_idxs += olengths_idxs[i];
3804     ptr_vals += olengths_vals[i];
3805   }
3806   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3807   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3808   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3809   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3810   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3811   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3812 
3813 #if 0
3814   if (!restrict_comm) { /* check */
3815     Vec       lvec,rvec;
3816     PetscReal infty_error;
3817 
3818     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3819     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3820     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3821     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3822     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3823     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3824     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3825     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3826     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3827   }
3828 #endif
3829 
3830   /* assemble new additional is (if any) */
3831   if (nis) {
3832     PetscInt **temp_idxs,*count_is,j,psum;
3833 
3834     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3835     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3836     ptr_idxs = recv_buffer_idxs_is;
3837     psum = 0;
3838     for (i=0;i<n_recvs;i++) {
3839       for (j=0;j<nis;j++) {
3840         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3841         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3842         psum += plen;
3843         ptr_idxs += plen+1; /* shift pointer to received data */
3844       }
3845     }
3846     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3847     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3848     for (i=1;i<nis;i++) {
3849       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3850     }
3851     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3852     ptr_idxs = recv_buffer_idxs_is;
3853     for (i=0;i<n_recvs;i++) {
3854       for (j=0;j<nis;j++) {
3855         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3856         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3857         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3858         ptr_idxs += plen+1; /* shift pointer to received data */
3859       }
3860     }
3861     for (i=0;i<nis;i++) {
3862       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3863       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3864       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3865     }
3866     ierr = PetscFree(count_is);CHKERRQ(ierr);
3867     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3868     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3869   }
3870   /* free workspace */
3871   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3872   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3873   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3874   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3875   if (isdense) {
3876     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3877     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3878   } else {
3879     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3880   }
3881   if (nis) {
3882     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3883     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3884   }
3885   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3886   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3887   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3888   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3889   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3890   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3891   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3892   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3893   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3894   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3895   ierr = PetscFree(onodes);CHKERRQ(ierr);
3896   if (nis) {
3897     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3898     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3899     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3900   }
3901   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3902   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3903     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3904     for (i=0;i<nis;i++) {
3905       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3906     }
3907   }
3908   PetscFunctionReturn(0);
3909 }
3910 
3911 /* temporary hack into ksp private data structure */
3912 #include <petsc-private/kspimpl.h>
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3916 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3917 {
3918   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3919   PC_IS                  *pcis = (PC_IS*)pc->data;
3920   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3921   MatNullSpace           CoarseNullSpace=NULL;
3922   ISLocalToGlobalMapping coarse_islg;
3923   IS                     coarse_is,*isarray;
3924   PetscInt               i,im_active=-1,active_procs=-1;
3925   PetscInt               nis,nisdofs,nisneu;
3926   PC                     pc_temp;
3927   PCType                 coarse_pc_type;
3928   KSPType                coarse_ksp_type;
3929   PetscBool              multilevel_requested,multilevel_allowed;
3930   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3931   Mat                    t_coarse_mat_is;
3932   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3933   PetscMPIInt            all_procs;
3934   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3935   PetscBool              compute_vecs = PETSC_FALSE;
3936   PetscScalar            *array;
3937   PetscErrorCode         ierr;
3938 
3939   PetscFunctionBegin;
3940   /* Assign global numbering to coarse dofs */
3941   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3942     compute_vecs = PETSC_TRUE;
3943     PetscInt ocoarse_size;
3944     ocoarse_size = pcbddc->coarse_size;
3945     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3946     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3947     /* see if we can avoid some work */
3948     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3949       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3950         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3951         coarse_reuse = PETSC_FALSE;
3952       } else { /* we can safely reuse already computed coarse matrix */
3953         coarse_reuse = PETSC_TRUE;
3954       }
3955     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3956       coarse_reuse = PETSC_FALSE;
3957     }
3958     /* reset any subassembling information */
3959     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3960     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3961   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3962     coarse_reuse = PETSC_TRUE;
3963   }
3964 
3965   /* count "active" (i.e. with positive local size) and "void" processes */
3966   im_active = !!(pcis->n);
3967   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3968   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3969   void_procs = all_procs-active_procs;
3970   csin_type_simple = PETSC_TRUE;
3971   redist = PETSC_FALSE;
3972   if (pcbddc->current_level && void_procs) {
3973     csin_ml = PETSC_TRUE;
3974     ncoarse_ml = void_procs;
3975     csin_ds = PETSC_TRUE;
3976     ncoarse_ds = void_procs;
3977   } else {
3978     csin_ml = PETSC_FALSE;
3979     ncoarse_ml = all_procs;
3980     if (void_procs) {
3981       csin_ds = PETSC_TRUE;
3982       ncoarse_ds = void_procs;
3983       csin_type_simple = PETSC_FALSE;
3984     } else {
3985       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3986         csin_ds = PETSC_TRUE;
3987         ncoarse_ds = pcbddc->redistribute_coarse;
3988         redist = PETSC_TRUE;
3989       } else {
3990         csin_ds = PETSC_FALSE;
3991         ncoarse_ds = all_procs;
3992       }
3993     }
3994   }
3995 
3996   /*
3997     test if we can go multilevel: three conditions must be satisfied:
3998     - we have not exceeded the number of levels requested
3999     - we can actually subassemble the active processes
4000     - we can find a suitable number of MPI processes where we can place the subassembled problem
4001   */
4002   multilevel_allowed = PETSC_FALSE;
4003   multilevel_requested = PETSC_FALSE;
4004   if (pcbddc->current_level < pcbddc->max_levels) {
4005     multilevel_requested = PETSC_TRUE;
4006     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4007       multilevel_allowed = PETSC_FALSE;
4008     } else {
4009       multilevel_allowed = PETSC_TRUE;
4010     }
4011   }
4012   /* determine number of process partecipating to coarse solver */
4013   if (multilevel_allowed) {
4014     ncoarse = ncoarse_ml;
4015     csin = csin_ml;
4016   } else {
4017     ncoarse = ncoarse_ds;
4018     csin = csin_ds;
4019   }
4020 
4021   /* creates temporary l2gmap and IS for coarse indexes */
4022   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4023   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4024 
4025   /* creates temporary MATIS object for coarse matrix */
4026   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4027   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4028   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4029   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4030 #if 0
4031   {
4032     PetscViewer viewer;
4033     char filename[256];
4034     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4035     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4036     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4037     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4038     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4039   }
4040 #endif
4041   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4042   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4043   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4044   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4045   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4046 
4047   /* compute dofs splitting and neumann boundaries for coarse dofs */
4048   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4049     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4050     const PetscInt         *idxs;
4051     ISLocalToGlobalMapping tmap;
4052 
4053     /* create map between primal indices (in local representative ordering) and local primal numbering */
4054     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4055     /* allocate space for temporary storage */
4056     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4057     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4058     /* allocate for IS array */
4059     nisdofs = pcbddc->n_ISForDofsLocal;
4060     nisneu = !!pcbddc->NeumannBoundariesLocal;
4061     nis = nisdofs + nisneu;
4062     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4063     /* dofs splitting */
4064     for (i=0;i<nisdofs;i++) {
4065       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4066       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4067       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4068       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4069       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4070       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4071       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4072       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4073     }
4074     /* neumann boundaries */
4075     if (pcbddc->NeumannBoundariesLocal) {
4076       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4077       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4078       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4079       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4080       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4081       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4082       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4083       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4084     }
4085     /* free memory */
4086     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4087     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4088     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4089   } else {
4090     nis = 0;
4091     nisdofs = 0;
4092     nisneu = 0;
4093     isarray = NULL;
4094   }
4095   /* destroy no longer needed map */
4096   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4097 
4098   /* restrict on coarse candidates (if needed) */
4099   coarse_mat_is = NULL;
4100   if (csin) {
4101     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4102       if (redist) {
4103         PetscMPIInt rank;
4104         PetscInt spc,n_spc_p1,dest[1];
4105 
4106         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4107         spc = all_procs/pcbddc->redistribute_coarse;
4108         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
4109         if (rank > n_spc_p1*(spc+1)-1) {
4110           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4111         } else {
4112           dest[0] = rank/(spc+1);
4113         }
4114         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4115       } else {
4116         PetscInt j,tissize,*nisindices;
4117         PetscInt *coarse_candidates;
4118         const PetscInt* tisindices;
4119         /* get coarse candidates' ranks in pc communicator */
4120         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4121         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4122         for (i=0,j=0;i<all_procs;i++) {
4123           if (!coarse_candidates[i]) {
4124             coarse_candidates[j]=i;
4125             j++;
4126           }
4127         }
4128         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
4129         /* get a suitable subassembling pattern */
4130         if (csin_type_simple) {
4131           PetscMPIInt rank;
4132           PetscInt    issize,isidx;
4133           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4134           if (im_active) {
4135             issize = 1;
4136             isidx = (PetscInt)rank;
4137           } else {
4138             issize = 0;
4139             isidx = -1;
4140           }
4141           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4142         } else {
4143           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4144         }
4145         if (pcbddc->dbg_flag) {
4146           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4147           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4148           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4149           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4150           for (i=0;i<j;i++) {
4151             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4152           }
4153           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4154           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4155         }
4156         /* shift the pattern on coarse candidates */
4157         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4158         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4159         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4160         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4161         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4162         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4163         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4164       }
4165     }
4166     if (pcbddc->dbg_flag) {
4167       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4168       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4169       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4170       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4171     }
4172     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4173     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4174   } else {
4175     if (pcbddc->dbg_flag) {
4176       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4177       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4178       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4179     }
4180     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4181     coarse_mat_is = t_coarse_mat_is;
4182   }
4183 
4184   /* create local to global scatters for coarse problem */
4185   if (compute_vecs) {
4186     PetscInt lrows;
4187     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4188     if (coarse_mat_is) {
4189       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4190     } else {
4191       lrows = 0;
4192     }
4193     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4194     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4195     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4196     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4197     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4198   }
4199   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4200   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4201 
4202   /* set defaults for coarse KSP and PC */
4203   if (multilevel_allowed) {
4204     coarse_ksp_type = KSPRICHARDSON;
4205     coarse_pc_type = PCBDDC;
4206   } else {
4207     coarse_ksp_type = KSPPREONLY;
4208     coarse_pc_type = PCREDUNDANT;
4209   }
4210 
4211   /* print some info if requested */
4212   if (pcbddc->dbg_flag) {
4213     if (!multilevel_allowed) {
4214       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4215       if (multilevel_requested) {
4216         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4217       } else if (pcbddc->max_levels) {
4218         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4219       }
4220       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4221     }
4222   }
4223 
4224   /* create the coarse KSP object only once with defaults */
4225   if (coarse_mat_is) {
4226     MatReuse coarse_mat_reuse;
4227     PetscViewer dbg_viewer = NULL;
4228     if (pcbddc->dbg_flag) {
4229       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4230       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4231     }
4232     if (!pcbddc->coarse_ksp) {
4233       char prefix[256],str_level[16];
4234       size_t len;
4235       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4236       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4237       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4238       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4239       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4240       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4241       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4242       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4243       /* prefix */
4244       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4245       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4246       if (!pcbddc->current_level) {
4247         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4248         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4249       } else {
4250         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4251         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4252         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4253         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4254         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4255         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4256       }
4257       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4258       /* allow user customization */
4259       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4260       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4261     }
4262 
4263     /* get some info after set from options */
4264     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4265     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4266     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4267     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4268     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4269       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4270       isbddc = PETSC_FALSE;
4271     }
4272     if (isredundant) {
4273       KSP inner_ksp;
4274       PC inner_pc;
4275       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4276       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4277       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4278     }
4279 
4280     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4281     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4282     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4283     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4284     if (nisdofs) {
4285       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4286       for (i=0;i<nisdofs;i++) {
4287         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4288       }
4289     }
4290     if (nisneu) {
4291       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4292       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4293     }
4294 
4295     /* assemble coarse matrix */
4296     if (coarse_reuse) {
4297       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4298       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4299       coarse_mat_reuse = MAT_REUSE_MATRIX;
4300     } else {
4301       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4302     }
4303     if (isbddc || isnn) {
4304       if (pcbddc->coarsening_ratio > 1) {
4305         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4306           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4307           if (pcbddc->dbg_flag) {
4308             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4309             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4310             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4311             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4312           }
4313         }
4314         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4315       } else {
4316         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4317         coarse_mat = coarse_mat_is;
4318       }
4319     } else {
4320       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4321     }
4322     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4323 
4324     /* propagate symmetry info to coarse matrix */
4325     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4326     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4327 
4328     /* set operators */
4329     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4330     if (pcbddc->dbg_flag) {
4331       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4332     }
4333   } else { /* processes non partecipating to coarse solver (if any) */
4334     coarse_mat = 0;
4335   }
4336   ierr = PetscFree(isarray);CHKERRQ(ierr);
4337 #if 0
4338   {
4339     PetscViewer viewer;
4340     char filename[256];
4341     sprintf(filename,"coarse_mat.m");
4342     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4343     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4344     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4345     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4346   }
4347 #endif
4348 
4349   /* Compute coarse null space (special handling by BDDC only) */
4350   if (pcbddc->NullSpace) {
4351     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4352   }
4353 
4354   if (pcbddc->coarse_ksp) {
4355     Vec crhs,csol;
4356     PetscBool ispreonly;
4357     if (CoarseNullSpace) {
4358       if (isbddc) {
4359         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4360       } else {
4361         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4362       }
4363     }
4364     /* setup coarse ksp */
4365     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4366     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4367     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4368     /* hack */
4369     if (!csol) {
4370       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4371     }
4372     if (!crhs) {
4373       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4374     }
4375     /* Check coarse problem if in debug mode or if solving with an iterative method */
4376     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4377     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4378       KSP       check_ksp;
4379       KSPType   check_ksp_type;
4380       PC        check_pc;
4381       Vec       check_vec,coarse_vec;
4382       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4383       PetscInt  its;
4384       PetscBool compute_eigs;
4385       PetscReal *eigs_r,*eigs_c;
4386       PetscInt  neigs;
4387       const char *prefix;
4388 
4389       /* Create ksp object suitable for estimation of extreme eigenvalues */
4390       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4391       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4392       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4393       if (ispreonly) {
4394         check_ksp_type = KSPPREONLY;
4395         compute_eigs = PETSC_FALSE;
4396       } else {
4397         check_ksp_type = KSPGMRES;
4398         compute_eigs = PETSC_TRUE;
4399       }
4400       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4401       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4402       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4403       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4404       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4405       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4406       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4407       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4408       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4409       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4410       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4411       /* create random vec */
4412       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4413       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4414       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4415       if (CoarseNullSpace) {
4416         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4417       }
4418       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4419       /* solve coarse problem */
4420       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4421       if (CoarseNullSpace) {
4422         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4423       }
4424       /* set eigenvalue estimation if preonly has not been requested */
4425       if (compute_eigs) {
4426         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4427         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4428         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4429         lambda_max = eigs_r[neigs-1];
4430         lambda_min = eigs_r[0];
4431         if (pcbddc->use_coarse_estimates) {
4432           if (lambda_max>lambda_min) {
4433             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4434             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4435           }
4436         }
4437       }
4438 
4439       /* check coarse problem residual error */
4440       if (pcbddc->dbg_flag) {
4441         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4442         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4443         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4444         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4445         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4446         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4447         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4448         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4449         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4450         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4451         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4452         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4453         if (compute_eigs) {
4454           PetscReal lambda_max_s,lambda_min_s;
4455           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4456           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4457           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4458           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4459           for (i=0;i<neigs;i++) {
4460             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4461           }
4462         }
4463         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4464         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4465       }
4466       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4467       if (compute_eigs) {
4468         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4469         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4470       }
4471     }
4472   }
4473   /* print additional info */
4474   if (pcbddc->dbg_flag) {
4475     /* waits until all processes reaches this point */
4476     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4477     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4478     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4479   }
4480 
4481   /* free memory */
4482   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4483   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4484   PetscFunctionReturn(0);
4485 }
4486 
4487 #undef __FUNCT__
4488 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4489 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4490 {
4491   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4492   PC_IS*         pcis = (PC_IS*)pc->data;
4493   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4494   PetscInt       i,coarse_size;
4495   PetscInt       *local_primal_indices;
4496   PetscErrorCode ierr;
4497 
4498   PetscFunctionBegin;
4499   /* Compute global number of coarse dofs */
4500   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4501     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4502   }
4503   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4504 
4505   /* check numbering */
4506   if (pcbddc->dbg_flag) {
4507     PetscScalar coarsesum,*array;
4508     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4509 
4510     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4511     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4512     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4513     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4514     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4515     for (i=0;i<pcbddc->local_primal_size;i++) {
4516       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4517     }
4518     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4519     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4520     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4521     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4522     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4523     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4524     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4525     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4526     for (i=0;i<pcis->n;i++) {
4527       if (array[i] == 1.0) {
4528         set_error = PETSC_TRUE;
4529         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4530       }
4531     }
4532     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4533     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4534     for (i=0;i<pcis->n;i++) {
4535       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4536     }
4537     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4538     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4539     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4540     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4541     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4542     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4543     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4544       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4545       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4546       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4547       for (i=0;i<pcbddc->local_primal_size;i++) {
4548         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4549       }
4550       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4551     }
4552     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4553     if (set_error_reduced) {
4554       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4555     }
4556   }
4557   /* get back data */
4558   *coarse_size_n = coarse_size;
4559   *local_primal_indices_n = local_primal_indices;
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 #undef __FUNCT__
4564 #define __FUNCT__ "PCBDDCGlobalToLocal"
4565 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4566 {
4567   IS             localis_t;
4568   PetscInt       i,lsize,*idxs,n;
4569   PetscScalar    *vals;
4570   PetscErrorCode ierr;
4571 
4572   PetscFunctionBegin;
4573   /* get indices in local ordering exploiting local to global map */
4574   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4575   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4576   for (i=0;i<lsize;i++) vals[i] = 1.0;
4577   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4578   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4579   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4580   if (idxs) { /* multilevel guard */
4581     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4582   }
4583   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4584   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4585   ierr = PetscFree(vals);CHKERRQ(ierr);
4586   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4587   /* now compute set in local ordering */
4588   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4589   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4590   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4591   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4592   for (i=0,lsize=0;i<n;i++) {
4593     if (PetscRealPart(vals[i]) > 0.5) {
4594       lsize++;
4595     }
4596   }
4597   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4598   for (i=0,lsize=0;i<n;i++) {
4599     if (PetscRealPart(vals[i]) > 0.5) {
4600       idxs[lsize++] = i;
4601     }
4602   }
4603   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4604   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4605   *localis = localis_t;
4606   PetscFunctionReturn(0);
4607 }
4608 
4609 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4610 #undef __FUNCT__
4611 #define __FUNCT__ "PCBDDCMatMult_Private"
4612 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4613 {
4614   PCBDDCChange_ctx change_ctx;
4615   PetscErrorCode   ierr;
4616 
4617   PetscFunctionBegin;
4618   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4619   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4620   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4621   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4622   PetscFunctionReturn(0);
4623 }
4624 
4625 #undef __FUNCT__
4626 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4627 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4628 {
4629   PCBDDCChange_ctx change_ctx;
4630   PetscErrorCode   ierr;
4631 
4632   PetscFunctionBegin;
4633   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4634   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4635   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4636   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4637   PetscFunctionReturn(0);
4638 }
4639 
4640 #undef __FUNCT__
4641 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4642 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4643 {
4644   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4645   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4646   PetscInt            *used_xadj,*used_adjncy;
4647   PetscBool           free_used_adj;
4648   PetscErrorCode      ierr;
4649 
4650   PetscFunctionBegin;
4651   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4652   free_used_adj = PETSC_FALSE;
4653   if (pcbddc->sub_schurs_layers == -1) {
4654     used_xadj = NULL;
4655     used_adjncy = NULL;
4656   } else {
4657     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4658       used_xadj = pcbddc->mat_graph->xadj;
4659       used_adjncy = pcbddc->mat_graph->adjncy;
4660     } else if (pcbddc->computed_rowadj) {
4661       used_xadj = pcbddc->mat_graph->xadj;
4662       used_adjncy = pcbddc->mat_graph->adjncy;
4663     } else {
4664       Mat            mat_adj;
4665       PetscBool      flg_row=PETSC_TRUE;
4666       const PetscInt *xadj,*adjncy;
4667       PetscInt       nvtxs;
4668 
4669       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4670       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4671       if (!flg_row) {
4672         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4673       }
4674       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4675       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4676       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4677       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4678       if (!flg_row) {
4679         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4680       }
4681       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4682       free_used_adj = PETSC_TRUE;
4683     }
4684   }
4685   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4686 
4687   /* free adjacency */
4688   if (free_used_adj) {
4689     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4690   }
4691   PetscFunctionReturn(0);
4692 }
4693 
4694 #undef __FUNCT__
4695 #define __FUNCT__ "PCBDDCInitSubSchurs"
4696 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4697 {
4698   PC_IS               *pcis=(PC_IS*)pc->data;
4699   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4700   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4701   PCBDDCGraph         graph;
4702   Mat                 S_j;
4703   PetscErrorCode      ierr;
4704 
4705   PetscFunctionBegin;
4706   /* attach interface graph for determining subsets */
4707   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4708     IS verticesIS;
4709 
4710     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4711     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4712     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4713     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4714     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4715     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4716 /*
4717     if (pcbddc->dbg_flag) {
4718       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4719     }
4720 */
4721   } else {
4722     graph = pcbddc->mat_graph;
4723   }
4724 
4725   /* Create Schur complement matrix */
4726   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4727   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4728 
4729   /* sub_schurs init */
4730   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4731   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4732   /* free graph struct */
4733   if (pcbddc->sub_schurs_rebuild) {
4734     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4735   }
4736   PetscFunctionReturn(0);
4737 }
4738