xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision a58a30b4bf9d00545e20033ff3f2527e68041eaf)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       n = PetscMax(n,nmin);
66       cum += subset_size*n;
67       cum2 += n;
68       maxneigs = PetscMax(maxneigs,n);
69     }
70   }
71 
72   if (mss) {
73     if (symmetric) {
74       PetscBLASInt B_itype = 1;
75       PetscBLASInt B_N = mss;
76       PetscScalar  zero = 0.0;
77       PetscScalar  eps = 0.0; /* dlamch? */
78 
79       B_lwork = -1;
80       S = NULL;
81       St = NULL;
82       eigs = NULL;
83       eigv = NULL;
84       B_iwork = NULL;
85       B_ifail = NULL;
86       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
87 #if defined(PETSC_USE_COMPLEX)
88       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
89 #else
90       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
91 #endif
92       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
93       ierr = PetscFPTrapPop();CHKERRQ(ierr);
94     } else {
95         /* TODO */
96     }
97   } else {
98     lwork = 0;
99   }
100 
101   nv = 0;
102   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
103     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
104   }
105   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
106   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
107                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
108 #if defined(PETSC_USE_COMPLEX)
109   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
110 #endif
111   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
112   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
113                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
114                       nv+cum,&pcbddc->adaptive_constraints_idxs,
115                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
116   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
117 
118   maxneigs = 0;
119   cum = cum2 = cumarray = 0;
120   if (sub_schurs->is_Ej_com) {
121     const PetscInt *idxs;
122 
123     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
124     for (cum=0;cum<nv;cum++) {
125       pcbddc->adaptive_constraints_n[cum] = 1;
126       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
127       pcbddc->adaptive_constraints_ptrs[cum] = cum;
128       pcbddc->adaptive_constraints_data[cum] = 1.0;
129     }
130     cum2 = cum;
131     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
132   }
133 
134   if (mss) { /* multilevel */
135     if (pcbddc->use_deluxe_scaling) {
136       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
137     } else {
138       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
139     }
140     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
141   }
142 
143   cumsubs = 0;
144   for (i=0;i<sub_schurs->n_subs;i++) {
145     PetscInt j,subset_size;
146 
147     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
148     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
149       const PetscInt *idxs;
150       PetscScalar    one = 1.0,zero=0.0;
151       PetscBLASInt   B_N;
152 
153       /* S should be copied since we need it for deluxe scaling */
154       if (symmetric) {
155         PetscInt j;
156         for (j=0;j<subset_size;j++) {
157           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
158         }
159         for (j=0;j<subset_size;j++) {
160           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
161         }
162       } else {
163         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
165       }
166       /* is always this the right matrix? */
167       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
168 
169       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
170       /* St = Starray+cumarray; */
171 
172       if (symmetric) {
173         PetscBLASInt B_itype = 1;
174         PetscBLASInt B_IL = 1, B_IU;
175         PetscScalar  eps = -1.0; /* dlamch? */
176 
177         /* ask for eigenvalues lower than thresh */
178         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
179         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
180         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
181 #if defined(PETSC_USE_COMPLEX)
182         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
183 #else
184         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
185 #endif
186         ierr = PetscFPTrapPop();CHKERRQ(ierr);
187         if (B_ierr) {
188           if (B_ierr < 0 ) {
189             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
190           } else if (B_ierr <= B_N) {
191             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
192           } else {
193             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
194           }
195         }
196 
197         if (B_neigs > nmax) {
198           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax);
199           B_neigs = nmax;
200         }
201 
202         if (B_neigs < nmin) {
203           PetscBLASInt B_neigs2;
204 
205           B_IL = B_neigs + 1;
206           ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr);
207           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
208           if (symmetric) {
209             PetscInt j;
210             for (j=0;j<subset_size;j++) {
211               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
212             }
213             for (j=0;j<subset_size;j++) {
214               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
215             }
216           } else {
217             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
219           }
220           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
221 #if defined(PETSC_USE_COMPLEX)
222           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
223 #else
224           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
225 #endif
226           ierr = PetscFPTrapPop();CHKERRQ(ierr);
227           B_neigs += B_neigs2;
228         }
229         if (B_ierr) {
230           if (B_ierr < 0 ) {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
232           } else if (B_ierr <= B_N) {
233             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
234           } else {
235             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
236           }
237         }
238         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
239         for (j=0;j<B_neigs;j++) {
240           if (eigs[j] == 0.0) {
241             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
242           } else {
243             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
244           }
245         }
246       } else {
247           /* TODO */
248       }
249       maxneigs = PetscMax(B_neigs,maxneigs);
250       pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs;
251       cumsubs++;
252 
253       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
254       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
255       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
256       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
257       for (j=0;j<B_neigs;j++) {
258 #if 0
259         {
260           PetscBLASInt Blas_N,Blas_one = 1.0;
261           PetscScalar norm;
262           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
263           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
264           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
265             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
266           } else {
267             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
268           }
269           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
270         }
271 #endif
272         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
273         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
274         cum2 += subset_size;
275       }
276       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
277     }
278     /* shift for next computation */
279     cumarray += subset_size*subset_size;
280   }
281   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
282   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
283 
284   if (mss) {
285     if (pcbddc->use_deluxe_scaling) {
286       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
287     } else {
288       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
289     }
290     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
291   }
292   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
293 #if defined(PETSC_USE_COMPLEX)
294   ierr = PetscFree(rwork);CHKERRQ(ierr);
295 #endif
296   if (pcbddc->dbg_flag) {
297     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
298     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "PCBDDCSetUpSolvers"
305 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
306 {
307   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
308   PetscScalar    *coarse_submat_vals;
309   PetscErrorCode ierr;
310 
311   PetscFunctionBegin;
312   /* Setup local scatters R_to_B and (optionally) R_to_D */
313   /* PCBDDCSetUpLocalWorkVectors should be called first! */
314   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
315 
316   /* Setup local neumann solver ksp_R */
317   /* PCBDDCSetUpLocalScatters should be called first! */
318   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
319 
320   /* Change global null space passed in by the user if change of basis has been requested */
321   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
322     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
323   }
324 
325   /*
326      Setup local correction and local part of coarse basis.
327      Gives back the dense local part of the coarse matrix in column major ordering
328   */
329   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
330 
331   /* Compute total number of coarse nodes and setup coarse solver */
332   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
333 
334   /* free */
335   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "PCBDDCResetCustomization"
341 PetscErrorCode PCBDDCResetCustomization(PC pc)
342 {
343   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
344   PetscErrorCode ierr;
345 
346   PetscFunctionBegin;
347   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
348   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
349   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
350   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
351   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
352   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
353   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
354   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
355   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
356   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
357   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "PCBDDCResetTopography"
363 PetscErrorCode PCBDDCResetTopography(PC pc)
364 {
365   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
366   PetscErrorCode ierr;
367 
368   PetscFunctionBegin;
369   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
370   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
371   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
372   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
373   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "PCBDDCResetSolvers"
379 PetscErrorCode PCBDDCResetSolvers(PC pc)
380 {
381   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
382   PetscScalar    *array;
383   PetscErrorCode ierr;
384 
385   PetscFunctionBegin;
386   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
387   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
388   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
389   ierr = PetscFree(array);CHKERRQ(ierr);
390   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
391   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
392   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
393   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
394   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
395   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
396   if (pcbddc->local_auxmat2) {
397     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
398     ierr = PetscFree(array);CHKERRQ(ierr);
399   }
400   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
402   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
403   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
404   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
405   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
406   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
407   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
408   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
409   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
410   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
411   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
412   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
413   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
414   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
415   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
421 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
422 {
423   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
424   PC_IS          *pcis = (PC_IS*)pc->data;
425   VecType        impVecType;
426   PetscInt       n_constraints,n_R,old_size;
427   PetscErrorCode ierr;
428 
429   PetscFunctionBegin;
430   if (!pcbddc->ConstraintMatrix) {
431     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
432   }
433   /* get sizes */
434   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
435   n_R = pcis->n-pcbddc->n_actual_vertices;
436   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
437   /* local work vectors (try to avoid unneeded work)*/
438   /* R nodes */
439   old_size = -1;
440   if (pcbddc->vec1_R) {
441     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
442   }
443   if (n_R != old_size) {
444     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
445     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
446     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
447     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
448     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
449     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
450   }
451   /* local primal dofs */
452   old_size = -1;
453   if (pcbddc->vec1_P) {
454     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
455   }
456   if (pcbddc->local_primal_size != old_size) {
457     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
458     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
459     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
460     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
461   }
462   /* local explicit constraints */
463   old_size = -1;
464   if (pcbddc->vec1_C) {
465     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
466   }
467   if (n_constraints && n_constraints != old_size) {
468     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
469     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
470     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
471     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "PCBDDCSetUpCorrection"
478 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
479 {
480   PetscErrorCode         ierr;
481   /* pointers to pcis and pcbddc */
482   PC_IS*                 pcis = (PC_IS*)pc->data;
483   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
484   /* submatrices of local problem */
485   Mat                    A_RV,A_VR,A_VV;
486   /* submatrices of local coarse problem */
487   Mat                    S_VV,S_CV,S_VC,S_CC;
488   /* working matrices */
489   Mat                    C_CR;
490   /* additional working stuff */
491   PC                     pc_R;
492   Mat                    F;
493   PetscBool              isLU,isCHOL,isILU;
494 
495   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
496   PetscScalar            *work;
497   PetscInt               *idx_V_B;
498   PetscInt               n,n_vertices,n_constraints;
499   PetscInt               i,n_R,n_D,n_B;
500   PetscBool              unsymmetric_check;
501   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
502   MatType                impMatType;
503   /* some shortcuts to scalars */
504   PetscScalar            one=1.0,m_one=-1.0;
505 
506   PetscFunctionBegin;
507   /* get number of vertices (corners plus constraints with change of basis)
508      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
509   n_vertices = pcbddc->n_actual_vertices;
510   n_constraints = pcbddc->local_primal_size-n_vertices;
511   /* Set Non-overlapping dimensions */
512   n_B = pcis->n_B; n_D = pcis->n - n_B;
513   n_R = pcis->n-n_vertices;
514 
515   /* Set types for local objects needed by BDDC precondtioner */
516   impMatType = MATSEQDENSE;
517 
518   /* vertices in boundary numbering */
519   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
520   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
521   if (i != n_vertices) {
522     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
523   }
524 
525   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
526   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
527   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
528   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
529   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
530   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
531   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
532   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
533   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
534   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
535 
536   unsymmetric_check = PETSC_FALSE;
537   /* allocate workspace */
538   n = 0;
539   if (n_constraints) {
540     n += n_R*n_constraints;
541   }
542   if (n_vertices) {
543     n = PetscMax(2*n_R*n_vertices,n);
544   }
545   if (!pcbddc->issym) {
546     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
547     unsymmetric_check = PETSC_TRUE;
548   }
549   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
550 
551   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
552   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
553   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
554   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
555   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
556   if (isLU || isILU || isCHOL) {
557     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
558   } else {
559     F = NULL;
560   }
561 
562   /* Precompute stuffs needed for preprocessing and application of BDDC*/
563   if (n_constraints) {
564     Mat M1,M2,M3;
565     IS  is_aux;
566     /* see if we can save some allocations */
567     if (pcbddc->local_auxmat2) {
568       PetscInt on_R,on_constraints;
569       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
570       if (on_R != n_R || on_constraints != n_constraints) {
571         PetscScalar *marray;
572 
573         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
574         ierr = PetscFree(marray);CHKERRQ(ierr);
575         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
576         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
577       }
578     }
579     /* auxiliary matrices */
580     if (!pcbddc->local_auxmat2) {
581       PetscScalar *marray;
582 
583       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
584       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
585       marray += n_R*n_constraints;
586       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
587     }
588 
589     /* Extract constraints on R nodes: C_{CR}  */
590     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
591     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
592     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
593 
594     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
595     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
596     for (i=0;i<n_constraints;i++) {
597       const PetscScalar *row_cmat_values;
598       const PetscInt    *row_cmat_indices;
599       PetscInt          size_of_constraint,j;
600 
601       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
602       for (j=0;j<size_of_constraint;j++) {
603         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
604       }
605       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
606     }
607     if (F) {
608       Mat B;
609 
610       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
611       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
612       ierr = MatDestroy(&B);CHKERRQ(ierr);
613     } else {
614       PetscScalar *xarray;
615       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
616       for (i=0;i<n_constraints;i++) {
617         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
618         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
619         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
620         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
621         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
622       }
623       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
624     }
625 
626     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
627     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
628     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
629     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
630     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
631     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
632     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
633     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
634     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
635     ierr = MatDestroy(&M2);CHKERRQ(ierr);
636     ierr = MatDestroy(&M3);CHKERRQ(ierr);
637     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
638     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
639     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
640     ierr = MatDestroy(&M1);CHKERRQ(ierr);
641   }
642 
643   /* Get submatrices from subdomain matrix */
644   if (n_vertices) {
645     Mat       newmat;
646     IS        is_aux;
647     PetscInt  ibs,mbs;
648     PetscBool issbaij;
649 
650     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
651     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
652     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
653     if (ibs != mbs) { /* need to convert to SEQAIJ */
654       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
655       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
656       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
657       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
658       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
659     } else {
660       /* this is safe */
661       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
662       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
663       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
664         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
665         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
666         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
667         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
668       } else {
669         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
670         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
671       }
672     }
673     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
674   }
675 
676   /* Matrix of coarse basis functions (local) */
677   if (pcbddc->coarse_phi_B) {
678     PetscInt on_B,on_primal,on_D=n_D;
679     if (pcbddc->coarse_phi_D) {
680       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
681     }
682     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
683     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
684       PetscScalar *marray;
685 
686       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
687       ierr = PetscFree(marray);CHKERRQ(ierr);
688       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
689       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
690       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
691       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
692     }
693   }
694 
695   if (!pcbddc->coarse_phi_B) {
696     PetscScalar *marray;
697 
698     n = n_B*pcbddc->local_primal_size;
699     if (pcbddc->switch_static || pcbddc->dbg_flag) {
700       n += n_D*pcbddc->local_primal_size;
701     }
702     if (!pcbddc->issym) {
703       n *= 2;
704     }
705     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
706     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
707     n = n_B*pcbddc->local_primal_size;
708     if (pcbddc->switch_static || pcbddc->dbg_flag) {
709       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
710       n += n_D*pcbddc->local_primal_size;
711     }
712     if (!pcbddc->issym) {
713       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
714       if (pcbddc->switch_static) {
715         n = n_B*pcbddc->local_primal_size;
716         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
717       }
718     } else {
719       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
720       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
721       if (pcbddc->switch_static) {
722         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
723         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
724       }
725     }
726   }
727   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
728   /* vertices */
729   if (n_vertices) {
730     Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
731     PetscScalar  *x,*y;
732     PetscBLASInt B_N,B_one = 1;
733 
734     ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
735     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
736     ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
737     if (F) {
738       ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
739     } else {
740       ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
741       for (i=0;i<n_vertices;i++) {
742         ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
743         ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
744         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
745         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
746         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
747       }
748       ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
749     }
750     ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
751     /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
752     if (n_constraints) {
753       Mat B;
754       ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
755       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
756       ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
757       ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
758       ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
759       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
760       ierr = MatDestroy(&B);CHKERRQ(ierr);
761     }
762     ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
763     ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
764     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
765     ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
766     ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
767     ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
768     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
769     ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
770     ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
771     ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
772     ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
773     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
774     /* coarse basis functions */
775     for (i=0;i<n_vertices;i++) {
776       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
777       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
778       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
779       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
780       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
781       y[n_B*i+idx_V_B[i]] = 1.0;
782       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
783       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
784 
785       if (pcbddc->switch_static || pcbddc->dbg_flag) {
786         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
787         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
788         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
789         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
790         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
791         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
792       }
793       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
794     }
795     ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
796     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
797   }
798 
799   if (n_constraints) {
800     Mat B;
801 
802     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
803     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
804     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
805     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
806     if (n_vertices) {
807       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
808     }
809     ierr = MatDestroy(&B);CHKERRQ(ierr);
810     /* coarse basis functions */
811     for (i=0;i<n_constraints;i++) {
812       PetscScalar *y;
813 
814       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
815       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
816       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
817       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
818       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
819       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
820       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
821 
822       if (pcbddc->switch_static || pcbddc->dbg_flag) {
823         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
824         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
825         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
828         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
829       }
830       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
831     }
832   }
833 
834   /* compute other basis functions for non-symmetric problems */
835   if (!pcbddc->issym) {
836     Mat B,X;
837 
838     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
839 
840     if (n_constraints) {
841       Mat S_VCT,S_CCT,B_C;
842 
843       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
844       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
845       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
846       if (n_vertices) {
847         Mat B_V;
848 
849         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
850         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
851         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
852         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
853       }
854       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
855     }
856     if (n_vertices) {
857       Mat          A_VRT;
858       PetscBLASInt B_N,B_one = 1;
859 
860       if (!n_constraints) { /* if there are no constraints, reset work */
861         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
862       }
863       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
864       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
865       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
866       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
867     }
868 
869     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
870     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
871       for (i=0;i<pcbddc->local_primal_size;i++) {
872         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
873         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
874         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
875         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
876         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
877       }
878     } else {
879       for (i=0;i<pcbddc->local_primal_size;i++) {
880         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
881         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
882         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
883         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
884         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
885       }
886     }
887     ierr = MatDestroy(&B);CHKERRQ(ierr);
888     /* coarse basis functions */
889     for (i=0;i<pcbddc->local_primal_size;i++) {
890       PetscScalar *y;
891 
892       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
893       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
894       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
895       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897       if (i<n_vertices) {
898         y[n_B*i+idx_V_B[i]] = 1.0;
899       }
900       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
901       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
902 
903       if (pcbddc->switch_static || pcbddc->dbg_flag) {
904         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
905         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
906         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
909         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
910       }
911       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
912     }
913     ierr = MatDestroy(&X);CHKERRQ(ierr);
914   }
915 
916   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
917   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
918   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
919   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
920   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
921   /* Checking coarse_sub_mat and coarse basis functios */
922   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
923   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
924   if (pcbddc->dbg_flag) {
925     Mat         coarse_sub_mat;
926     Mat         AUXMAT,TM1,TM2,TM3,TM4;
927     Mat         coarse_phi_D,coarse_phi_B;
928     Mat         coarse_psi_D,coarse_psi_B;
929     Mat         A_II,A_BB,A_IB,A_BI;
930     MatType     checkmattype=MATSEQAIJ;
931     PetscReal   real_value;
932 
933     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
934     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
935     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
936     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
937     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
938     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
939     if (unsymmetric_check) {
940       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
941       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
942     }
943     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
944 
945     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
946     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
947     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
948     if (unsymmetric_check) {
949       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
950       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
951       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
952       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
953       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
954       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
955       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
956       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
957       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
958       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
959       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
960       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
961     } else {
962       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
963       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
964       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
965       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
966       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
967       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
968       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
969       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
970     }
971     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
972     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
973     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
974     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
975     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
976     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
977 
978     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
979     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
980     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
981     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
982     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
983     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
984     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
985     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
986     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
987     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
988     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
989     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
990     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
991     if (unsymmetric_check) {
992       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
993       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
994     }
995     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
996   }
997 
998   /* free memory */
999   ierr = PetscFree(work);CHKERRQ(ierr);
1000   if (n_vertices) {
1001     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1002   }
1003   if (n_constraints) {
1004     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1005   }
1006   /* get back data */
1007   *coarse_submat_vals_n = coarse_submat_vals;
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1013 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1014 {
1015   Mat            *work_mat;
1016   IS             isrow_s,iscol_s;
1017   PetscBool      rsorted,csorted;
1018   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1023   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1024   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1025   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1026 
1027   if (!rsorted) {
1028     const PetscInt *idxs;
1029     PetscInt *idxs_sorted,i;
1030 
1031     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1032     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1033     for (i=0;i<rsize;i++) {
1034       idxs_perm_r[i] = i;
1035     }
1036     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1037     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1038     for (i=0;i<rsize;i++) {
1039       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1040     }
1041     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1042     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1043   } else {
1044     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1045     isrow_s = isrow;
1046   }
1047 
1048   if (!csorted) {
1049     if (isrow == iscol) {
1050       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1051       iscol_s = isrow_s;
1052     } else {
1053       const PetscInt *idxs;
1054       PetscInt *idxs_sorted,i;
1055 
1056       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1057       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1058       for (i=0;i<csize;i++) {
1059         idxs_perm_c[i] = i;
1060       }
1061       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1062       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1063       for (i=0;i<csize;i++) {
1064         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1065       }
1066       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1067       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1068     }
1069   } else {
1070     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1071     iscol_s = iscol;
1072   }
1073 
1074   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1075 
1076   if (!rsorted || !csorted) {
1077     Mat      new_mat;
1078     IS       is_perm_r,is_perm_c;
1079 
1080     if (!rsorted) {
1081       PetscInt *idxs_r,i;
1082       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1083       for (i=0;i<rsize;i++) {
1084         idxs_r[idxs_perm_r[i]] = i;
1085       }
1086       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1087       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1088     } else {
1089       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1090     }
1091     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1092 
1093     if (!csorted) {
1094       if (isrow_s == iscol_s) {
1095         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1096         is_perm_c = is_perm_r;
1097       } else {
1098         PetscInt *idxs_c,i;
1099         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1100         for (i=0;i<csize;i++) {
1101           idxs_c[idxs_perm_c[i]] = i;
1102         }
1103         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1104         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1105       }
1106     } else {
1107       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1108     }
1109     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1110 
1111     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1112     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1113     work_mat[0] = new_mat;
1114     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1115     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1116   }
1117 
1118   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1119   *B = work_mat[0];
1120   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1121   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1122   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1128 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1129 {
1130   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1131   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1132   Mat            new_mat;
1133   IS             is_local,is_global;
1134   PetscInt       local_size;
1135   PetscBool      isseqaij;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1140   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1141   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1142   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1143   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1144   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1145   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1146 
1147   /* check */
1148   if (pcbddc->dbg_flag) {
1149     Vec       x,x_change;
1150     PetscReal error;
1151 
1152     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1153     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1154     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1155     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1158     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1161     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1162     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1163     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1164     ierr = VecDestroy(&x);CHKERRQ(ierr);
1165     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1166   }
1167 
1168   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1169   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1170   if (isseqaij) {
1171     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1172   } else {
1173     Mat work_mat;
1174     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1175     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1176     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1177   }
1178   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1179   /*
1180   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1181   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1182   */
1183   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1189 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1190 {
1191   PC_IS*         pcis = (PC_IS*)(pc->data);
1192   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1193   IS             is_aux1,is_aux2;
1194   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1195   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1196   PetscInt       vbs,bs;
1197   PetscBT        bitmask;
1198   PetscErrorCode ierr;
1199 
1200   PetscFunctionBegin;
1201   /*
1202     No need to setup local scatters if
1203       - primal space is unchanged
1204         AND
1205       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1206         AND
1207       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1208   */
1209   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1210     PetscFunctionReturn(0);
1211   }
1212   /* destroy old objects */
1213   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1214   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1215   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1216   /* Set Non-overlapping dimensions */
1217   n_B = pcis->n_B; n_D = pcis->n - n_B;
1218   n_vertices = pcbddc->n_actual_vertices;
1219   /* create auxiliary bitmask */
1220   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1221   for (i=0;i<n_vertices;i++) {
1222     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1223   }
1224 
1225   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1226   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1227   for (i=0, n_R=0; i<pcis->n; i++) {
1228     if (!PetscBTLookup(bitmask,i)) {
1229       idx_R_local[n_R] = i;
1230       n_R++;
1231     }
1232   }
1233 
1234   /* Block code */
1235   vbs = 1;
1236   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1237   if (bs>1 && !(n_vertices%bs)) {
1238     PetscBool is_blocked = PETSC_TRUE;
1239     PetscInt  *vary;
1240     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1241     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1242     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1243     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1244     for (i=0; i<n_vertices; i++) {
1245       if (vary[i]!=0 && vary[i]!=bs) {
1246         is_blocked = PETSC_FALSE;
1247         break;
1248       }
1249     }
1250     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1251       vbs = bs;
1252       for (i=0;i<n_R/vbs;i++) {
1253         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1254       }
1255     }
1256     ierr = PetscFree(vary);CHKERRQ(ierr);
1257   }
1258   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1259   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1260 
1261   /* print some info if requested */
1262   if (pcbddc->dbg_flag) {
1263     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1264     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1266     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1267     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1268     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1269     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1270     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1271   }
1272 
1273   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1274   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1275   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1276   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1277   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1278   for (i=0; i<n_D; i++) {
1279     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1280   }
1281   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1282   for (i=0, j=0; i<n_R; i++) {
1283     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1284       aux_array1[j++] = i;
1285     }
1286   }
1287   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1288   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1289   for (i=0, j=0; i<n_B; i++) {
1290     if (!PetscBTLookup(bitmask,is_indices[i])) {
1291       aux_array2[j++] = i;
1292     }
1293   }
1294   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1295   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1296   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1297   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1298   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1299 
1300   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1301     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1302     for (i=0, j=0; i<n_R; i++) {
1303       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1304         aux_array1[j++] = i;
1305       }
1306     }
1307     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1308     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1309     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1310   }
1311   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1312   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1319 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1320 {
1321   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1322   PC_IS          *pcis = (PC_IS*)pc->data;
1323   PC             pc_temp;
1324   Mat            A_RR;
1325   MatReuse       reuse;
1326   PetscScalar    m_one = -1.0;
1327   PetscReal      value;
1328   PetscInt       n_D,n_R,ibs,mbs;
1329   PetscBool      use_exact,use_exact_reduced,issbaij;
1330   PetscErrorCode ierr;
1331   /* prefixes stuff */
1332   char           dir_prefix[256],neu_prefix[256],str_level[16];
1333   size_t         len;
1334 
1335   PetscFunctionBegin;
1336 
1337   /* compute prefixes */
1338   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1339   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1340   if (!pcbddc->current_level) {
1341     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1342     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1343     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1344     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1345   } else {
1346     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1347     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1348     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1349     len -= 15; /* remove "pc_bddc_coarse_" */
1350     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1351     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1352     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1353     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1354     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1355     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1356     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1357     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1358   }
1359 
1360   /* DIRICHLET PROBLEM */
1361   if (dirichlet) {
1362     /* Matrix for Dirichlet problem is pcis->A_II */
1363     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1364     if (!pcbddc->ksp_D) { /* create object if not yet build */
1365       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1366       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1367       /* default */
1368       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1369       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1370       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1371       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1372       if (issbaij) {
1373         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1374       } else {
1375         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1376       }
1377       /* Allow user's customization */
1378       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1379       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1380     }
1381     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1382     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1383     if (!n_D) {
1384       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1385       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1386     }
1387     /* Set Up KSP for Dirichlet problem of BDDC */
1388     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1389     /* set ksp_D into pcis data */
1390     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1391     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1392     pcis->ksp_D = pcbddc->ksp_D;
1393   }
1394 
1395   /* NEUMANN PROBLEM */
1396   A_RR = 0;
1397   if (neumann) {
1398     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1399     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1400     if (pcbddc->ksp_R) { /* already created ksp */
1401       PetscInt nn_R;
1402       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1403       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1404       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1405       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1406         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1407         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1408         reuse = MAT_INITIAL_MATRIX;
1409       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1410         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1411           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1412           reuse = MAT_INITIAL_MATRIX;
1413         } else { /* safe to reuse the matrix */
1414           reuse = MAT_REUSE_MATRIX;
1415         }
1416       }
1417       /* last check */
1418       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1419         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1420         reuse = MAT_INITIAL_MATRIX;
1421       }
1422     } else { /* first time, so we need to create the matrix */
1423       reuse = MAT_INITIAL_MATRIX;
1424     }
1425     /* extract A_RR */
1426     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1427     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1428     if (ibs != mbs) {
1429       Mat newmat;
1430       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1431       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1432       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1433     } else {
1434       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1435     }
1436     if (!pcbddc->ksp_R) { /* create object if not present */
1437       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1438       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1439       /* default */
1440       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1441       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1442       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1443       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1444       if (issbaij) {
1445         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1446       } else {
1447         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1448       }
1449       /* Allow user's customization */
1450       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1451       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1452     }
1453     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1454     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1455     if (!n_R) {
1456       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1457       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1458     }
1459     /* Set Up KSP for Neumann problem of BDDC */
1460     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1461   }
1462 
1463   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1464   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1465     if (pcbddc->dbg_flag) {
1466       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1468       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1469     }
1470     if (dirichlet) { /* Dirichlet */
1471       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1472       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1473       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1474       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1475       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1476       /* need to be adapted? */
1477       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1478       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1479       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1480       /* print info */
1481       if (pcbddc->dbg_flag) {
1482         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1483         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1484       }
1485       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1486         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1487       }
1488     }
1489     if (neumann) { /* Neumann */
1490       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1491       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1492       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1493       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1494       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1495       /* need to be adapted? */
1496       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1497       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1498       /* print info */
1499       if (pcbddc->dbg_flag) {
1500         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1501         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1502       }
1503       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1504         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1505       }
1506     }
1507   }
1508   /* free Neumann problem's matrix */
1509   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1515 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1516 {
1517   PetscErrorCode ierr;
1518   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1519 
1520   PetscFunctionBegin;
1521   if (applytranspose) {
1522     if (pcbddc->local_auxmat1) {
1523       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1524       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1527   } else {
1528     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1529     if (pcbddc->local_auxmat1) {
1530       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1531       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1532     }
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1538 #undef __FUNCT__
1539 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1540 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1541 {
1542   PetscErrorCode ierr;
1543   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1544   PC_IS*            pcis = (PC_IS*)  (pc->data);
1545   const PetscScalar zero = 0.0;
1546 
1547   PetscFunctionBegin;
1548   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1549   if (applytranspose) {
1550     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1551     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1552   } else {
1553     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1554     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1555   }
1556   /* start communications from local primal nodes to rhs of coarse solver */
1557   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1558   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1559   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1560 
1561   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1562   /* TODO remove null space when doing multilevel */
1563   if (pcbddc->coarse_ksp) {
1564     if (applytranspose) {
1565       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1566     } else {
1567       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1568     }
1569   }
1570 
1571   /* Local solution on R nodes */
1572   if (pcis->n) {
1573     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1574     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1575     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1576     if (pcbddc->switch_static) {
1577       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1578       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1579     }
1580     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1581     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1582     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584     if (pcbddc->switch_static) {
1585       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1586       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1587     }
1588   }
1589 
1590   /* communications from coarse sol to local primal nodes */
1591   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1592   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1593 
1594   /* Sum contributions from two levels */
1595   if (applytranspose) {
1596     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1597     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1598   } else {
1599     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1600     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1606 #undef __FUNCT__
1607 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1608 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1609 {
1610   PetscErrorCode ierr;
1611   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1612   PetscScalar    *array,*array2;
1613   Vec            from,to;
1614 
1615   PetscFunctionBegin;
1616   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1617     from = pcbddc->coarse_vec;
1618     to = pcbddc->vec1_P;
1619     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1620       Vec tvec;
1621       PetscInt lsize;
1622       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1623       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1624       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1625       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1626       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1627       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1628       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1629     }
1630   } else { /* from local to global -> put data in coarse right hand side */
1631     from = pcbddc->vec1_P;
1632     to = pcbddc->coarse_vec;
1633   }
1634   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1640 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1641 {
1642   PetscErrorCode ierr;
1643   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1644   PetscScalar    *array,*array2;
1645   Vec            from,to;
1646 
1647   PetscFunctionBegin;
1648   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1649     from = pcbddc->coarse_vec;
1650     to = pcbddc->vec1_P;
1651   } else { /* from local to global -> put data in coarse right hand side */
1652     from = pcbddc->vec1_P;
1653     to = pcbddc->coarse_vec;
1654   }
1655   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1656   if (smode == SCATTER_FORWARD) {
1657     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1658       Vec tvec;
1659       PetscInt lsize;
1660       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1661       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1662       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1663       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1664       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1665       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1666       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1667     }
1668   }
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 /* uncomment for testing purposes */
1673 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1674 #undef __FUNCT__
1675 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1676 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1677 {
1678   PetscErrorCode    ierr;
1679   PC_IS*            pcis = (PC_IS*)(pc->data);
1680   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1681   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1682   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1683   MatType           impMatType=MATSEQAIJ;
1684   /* one and zero */
1685   PetscScalar       one=1.0,zero=0.0;
1686   /* space to store constraints and their local indices */
1687   PetscScalar       *temp_quadrature_constraint;
1688   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1689   /* iterators */
1690   PetscInt          i,j,k,total_counts,temp_start_ptr;
1691   /* stuff to store connected components stored in pcbddc->mat_graph */
1692   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1693   PetscInt          n_ISForFaces,n_ISForEdges;
1694   /* near null space stuff */
1695   MatNullSpace      nearnullsp;
1696   const Vec         *nearnullvecs;
1697   Vec               *localnearnullsp;
1698   PetscBool         nnsp_has_cnst;
1699   PetscInt          nnsp_size;
1700   PetscScalar       *array;
1701   /* BLAS integers */
1702   PetscBLASInt      lwork,lierr;
1703   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1704   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1705   /* LAPACK working arrays for SVD or POD */
1706   PetscBool         skip_lapack;
1707   PetscScalar       *work;
1708   PetscReal         *singular_vals;
1709 #if defined(PETSC_USE_COMPLEX)
1710   PetscReal         *rwork;
1711 #endif
1712 #if defined(PETSC_MISSING_LAPACK_GESVD)
1713   PetscBLASInt      Blas_one_2=1;
1714   PetscScalar       *temp_basis,*correlation_mat;
1715 #else
1716   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1717   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1718 #endif
1719   /* reuse */
1720   PetscInt          olocal_primal_size;
1721   PetscInt          *oprimal_indices_local_idxs;
1722   /* change of basis */
1723   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1724   PetscBool         boolforchange,qr_needed;
1725   PetscBT           touched,change_basis,qr_needed_idx;
1726   /* auxiliary stuff */
1727   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1728   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1729   PetscScalar       *temp_quadrature_constraint_work=NULL;
1730   /* some quantities */
1731   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1732   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1733 
1734 
1735   PetscFunctionBegin;
1736   /* Destroy Mat objects computed previously */
1737   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1738   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1739   /* TODO synch with adaptive selection */
1740   /* Get index sets for faces, edges and vertices from graph */
1741   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1742   /* free unneeded index sets */
1743   if (!pcbddc->use_vertices) {
1744     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1745   }
1746   if (!pcbddc->use_edges) {
1747     for (i=0;i<n_ISForEdges;i++) {
1748       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1749     }
1750     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1751     n_ISForEdges = 0;
1752   }
1753   if (!pcbddc->use_faces) {
1754     for (i=0;i<n_ISForFaces;i++) {
1755       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1756     }
1757     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1758     n_ISForFaces = 0;
1759   }
1760   /* HACKS (the following two blocks of code) */
1761   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1762     pcbddc->use_change_of_basis = PETSC_TRUE;
1763     if (!ISForEdges) {
1764       pcbddc->use_change_on_faces = PETSC_TRUE;
1765     }
1766   }
1767   if (pcbddc->NullSpace) {
1768     /* use_change_of_basis should be consistent among processors */
1769     PetscBool tbool[2],gbool[2];
1770     tbool [0] = pcbddc->use_change_of_basis;
1771     tbool [1] = pcbddc->use_change_on_faces;
1772     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1773     pcbddc->use_change_of_basis = gbool[0];
1774     pcbddc->use_change_on_faces = gbool[1];
1775   }
1776   /* print some info */
1777   if (pcbddc->dbg_flag) {
1778     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1779     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1780     i = 0;
1781     if (ISForVertices) {
1782       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1783     }
1784     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1785     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1786     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1787     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1788   }
1789 
1790   if (!pcbddc->adaptive_selection) {
1791     /* check if near null space is attached to global mat */
1792     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1793     if (nearnullsp) {
1794       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1795       /* remove any stored info */
1796       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1797       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1798       /* store information for BDDC solver reuse */
1799       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1800       pcbddc->onearnullspace = nearnullsp;
1801       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1802       for (i=0;i<nnsp_size;i++) {
1803         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1804       }
1805     } else { /* if near null space is not provided BDDC uses constants by default */
1806       nnsp_size = 0;
1807       nnsp_has_cnst = PETSC_TRUE;
1808     }
1809     /* get max number of constraints on a single cc */
1810     max_constraints = nnsp_size;
1811     if (nnsp_has_cnst) max_constraints++;
1812 
1813     /*
1814          Evaluate maximum storage size needed by the procedure
1815          - temp_indices will contain start index of each constraint stored as follows
1816          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1817          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1818          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1819                                                                                                                                                            */
1820     total_counts = n_ISForFaces+n_ISForEdges;
1821     total_counts *= max_constraints;
1822     n_vertices = 0;
1823     if (ISForVertices) {
1824       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1825     }
1826     total_counts += n_vertices;
1827     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1828     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1829     total_counts = 0;
1830     max_size_of_constraint = 0;
1831     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1832       if (i<n_ISForEdges) {
1833         used_IS = &ISForEdges[i];
1834       } else {
1835         used_IS = &ISForFaces[i-n_ISForEdges];
1836       }
1837       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1838       total_counts += j;
1839       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1840     }
1841     total_counts *= max_constraints;
1842     total_counts += n_vertices;
1843     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1844     /* get local part of global near null space vectors */
1845     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1846     for (k=0;k<nnsp_size;k++) {
1847       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1848       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1849       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1850     }
1851 
1852     /* whether or not to skip lapack calls */
1853     skip_lapack = PETSC_TRUE;
1854     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1855 
1856     /* allocate some auxiliary stuff */
1857     if (!skip_lapack || pcbddc->use_qr_single) {
1858       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1859     }
1860 
1861     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1862     if (!skip_lapack) {
1863       PetscScalar temp_work;
1864 
1865 #if defined(PETSC_MISSING_LAPACK_GESVD)
1866       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1867       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1868       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1869       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1870 #if defined(PETSC_USE_COMPLEX)
1871       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1872 #endif
1873       /* now we evaluate the optimal workspace using query with lwork=-1 */
1874       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1875       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1876       lwork = -1;
1877       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1878 #if !defined(PETSC_USE_COMPLEX)
1879       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1880 #else
1881       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1882 #endif
1883       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1884       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1885 #else /* on missing GESVD */
1886       /* SVD */
1887       PetscInt max_n,min_n;
1888       max_n = max_size_of_constraint;
1889       min_n = max_constraints;
1890       if (max_size_of_constraint < max_constraints) {
1891         min_n = max_size_of_constraint;
1892         max_n = max_constraints;
1893       }
1894       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1895 #if defined(PETSC_USE_COMPLEX)
1896       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1897 #endif
1898       /* now we evaluate the optimal workspace using query with lwork=-1 */
1899       lwork = -1;
1900       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1901       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1902       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1903       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1904 #if !defined(PETSC_USE_COMPLEX)
1905       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1906 #else
1907       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1908 #endif
1909       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1910       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1911 #endif /* on missing GESVD */
1912       /* Allocate optimal workspace */
1913       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1914       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1915     }
1916     /* Now we can loop on constraining sets */
1917     total_counts = 0;
1918     temp_indices[0] = 0;
1919     /* vertices */
1920     if (ISForVertices) {
1921       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1922       if (nnsp_has_cnst) { /* consider all vertices */
1923         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1924         for (i=0;i<n_vertices;i++) {
1925           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1926           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1927           total_counts++;
1928         }
1929       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1930         PetscBool used_vertex;
1931         for (i=0;i<n_vertices;i++) {
1932           used_vertex = PETSC_FALSE;
1933           k = 0;
1934           while (!used_vertex && k<nnsp_size) {
1935             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1936             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1937               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1938               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1939               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1940               total_counts++;
1941               used_vertex = PETSC_TRUE;
1942             }
1943             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1944             k++;
1945           }
1946         }
1947       }
1948       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1949       n_vertices = total_counts;
1950     }
1951 
1952     /* edges and faces */
1953     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1954       if (ncc<n_ISForEdges) {
1955         used_IS = &ISForEdges[ncc];
1956         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1957       } else {
1958         used_IS = &ISForFaces[ncc-n_ISForEdges];
1959         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1960       }
1961       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1962       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1963       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1964       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1965       /* change of basis should not be performed on local periodic nodes */
1966       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1967       if (nnsp_has_cnst) {
1968         PetscScalar quad_value;
1969         temp_constraints++;
1970         if (!pcbddc->use_nnsp_true) {
1971           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1972         } else {
1973           quad_value = 1.0;
1974         }
1975         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1976         for (j=0;j<size_of_constraint;j++) {
1977           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1978         }
1979         /* sort by global ordering if using lapack subroutines (not needed!) */
1980         if (!skip_lapack || pcbddc->use_qr_single) {
1981           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1982           for (j=0;j<size_of_constraint;j++) {
1983             permutation[j]=j;
1984           }
1985           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1986           for (j=0;j<size_of_constraint;j++) {
1987             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
1988           }
1989           for (j=0;j<size_of_constraint;j++) {
1990             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1991             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1992           }
1993           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1994           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1995         }
1996         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1997         total_counts++;
1998       }
1999       for (k=0;k<nnsp_size;k++) {
2000         PetscReal real_value;
2001         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2002         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2003         for (j=0;j<size_of_constraint;j++) {
2004           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2005         }
2006         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2007         /* check if array is null on the connected component */
2008         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2009         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2010         if (real_value > 0.0) { /* keep indices and values */
2011           /* sort by global ordering if using lapack subroutines */
2012           if (!skip_lapack || pcbddc->use_qr_single) {
2013             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2014             for (j=0;j<size_of_constraint;j++) {
2015               permutation[j]=j;
2016             }
2017             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2018             for (j=0;j<size_of_constraint;j++) {
2019               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2020               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2021             }
2022             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2023             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2024           }
2025           temp_constraints++;
2026           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2027           total_counts++;
2028         }
2029       }
2030       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2031       valid_constraints = temp_constraints;
2032       if (!pcbddc->use_nnsp_true && temp_constraints) {
2033         if (temp_constraints == 1) { /* just normalize the constraint */
2034           PetscScalar norm;
2035           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2036           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2037           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2038           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2039         } else { /* perform SVD */
2040           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2041 
2042 #if defined(PETSC_MISSING_LAPACK_GESVD)
2043           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2044              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2045              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2046                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2047                 from that computed using LAPACKgesvd
2048              -> This is due to a different computation of eigenvectors in LAPACKheev
2049              -> The quality of the POD-computed basis will be the same */
2050           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2051           /* Store upper triangular part of correlation matrix */
2052           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2053           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2054           for (j=0;j<temp_constraints;j++) {
2055             for (k=0;k<j+1;k++) {
2056               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2057             }
2058           }
2059           /* compute eigenvalues and eigenvectors of correlation matrix */
2060           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2061           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2062 #if !defined(PETSC_USE_COMPLEX)
2063           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2064 #else
2065           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2066 #endif
2067           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2068           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2069           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2070           j = 0;
2071           while (j < temp_constraints && singular_vals[j] < tol) j++;
2072           total_counts = total_counts-j;
2073           valid_constraints = temp_constraints-j;
2074           /* scale and copy POD basis into used quadrature memory */
2075           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2076           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2077           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2078           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2079           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2080           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2081           if (j<temp_constraints) {
2082             PetscInt ii;
2083             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2084             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2085             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2086             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2087             for (k=0;k<temp_constraints-j;k++) {
2088               for (ii=0;ii<size_of_constraint;ii++) {
2089                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2090               }
2091             }
2092           }
2093 #else  /* on missing GESVD */
2094           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2096           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2097           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2098 #if !defined(PETSC_USE_COMPLEX)
2099           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2100 #else
2101           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2102 #endif
2103           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2104           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2105           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2106           k = temp_constraints;
2107           if (k > size_of_constraint) k = size_of_constraint;
2108           j = 0;
2109           while (j < k && singular_vals[k-j-1] < tol) j++;
2110           valid_constraints = k-j;
2111           total_counts = total_counts-temp_constraints+valid_constraints;
2112 #endif /* on missing GESVD */
2113         }
2114       }
2115       /* setting change_of_basis flag is safe now */
2116       if (boolforchange) {
2117         for (j=0;j<valid_constraints;j++) {
2118           PetscBTSet(change_basis,total_counts-j-1);
2119         }
2120       }
2121     }
2122     /* free workspace */
2123     if (!skip_lapack || pcbddc->use_qr_single) {
2124       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2125     }
2126     if (!skip_lapack) {
2127       ierr = PetscFree(work);CHKERRQ(ierr);
2128 #if defined(PETSC_USE_COMPLEX)
2129       ierr = PetscFree(rwork);CHKERRQ(ierr);
2130 #endif
2131       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2132 #if defined(PETSC_MISSING_LAPACK_GESVD)
2133       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2134       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2135 #endif
2136     }
2137     for (k=0;k<nnsp_size;k++) {
2138       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2139     }
2140     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2141   } else {
2142     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2143     PetscInt        cum = 0;
2144 
2145     total_counts = 0;
2146     n_vertices = 0;
2147     if (sub_schurs->is_Ej_com) {
2148       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2149     }
2150     max_constraints = 0;
2151     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2152       total_counts += pcbddc->adaptive_constraints_n[i];
2153       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2154     }
2155     temp_indices = pcbddc->adaptive_constraints_ptrs;
2156     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2157     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2158 
2159 #if 0
2160     printf("Found %d totals\n",total_counts);
2161     for (i=0;i<total_counts;i++) {
2162       printf("const %d, start %d",i,temp_indices[i]);
2163       printf(" end %d:\n",temp_indices[i+1]);
2164       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2165         printf("  idxs %d",temp_indices_to_constraint[j]);
2166         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2167       }
2168     }
2169 #endif
2170 
2171     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2172     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2173     /* Change of basis */
2174     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2175     if (pcbddc->use_change_of_basis) {
2176       cum = n_vertices;
2177       for (i=0;i<sub_schurs->n_subs;i++) {
2178         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2179           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2180             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2181           }
2182         }
2183         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2184       }
2185     }
2186   }
2187 
2188   /* free index sets of faces, edges and vertices */
2189   for (i=0;i<n_ISForFaces;i++) {
2190     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2191   }
2192   if (n_ISForFaces) {
2193     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2194   }
2195   for (i=0;i<n_ISForEdges;i++) {
2196     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2197   }
2198   if (n_ISForEdges) {
2199     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2200   }
2201   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2202 
2203   /* map temp_indices_to_constraint in boundary numbering */
2204   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2205   if (i != temp_indices[total_counts]) {
2206     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2207   }
2208 
2209   /* set quantities in pcbddc data structure and store previous primal size */
2210   /* n_vertices defines the number of subdomain corners in the primal space */
2211   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2212   olocal_primal_size = pcbddc->local_primal_size;
2213   pcbddc->local_primal_size = total_counts;
2214   pcbddc->n_vertices = n_vertices;
2215   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2216 
2217   /* Create constraint matrix */
2218   /* The constraint matrix is used to compute the l2g map of primal dofs */
2219   /* so we need to set it up properly either with or without change of basis */
2220   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2221   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
2222   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2223   /* array to compute a local numbering of constraints : vertices first then constraints */
2224   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2225   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2226   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2227   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2228   /* auxiliary stuff for basis change */
2229   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2230   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2231 
2232   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2233   total_primal_vertices=0;
2234   for (i=0;i<pcbddc->local_primal_size;i++) {
2235     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2236     if (size_of_constraint == 1) {
2237       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2238       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2239       aux_primal_minloc[total_primal_vertices]=0;
2240       total_primal_vertices++;
2241     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2242       PetscInt min_loc,min_index;
2243       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2244       /* find first untouched local node */
2245       k = 0;
2246       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2247       min_index = global_indices[k];
2248       min_loc = k;
2249       /* search the minimum among global nodes already untouched on the cc */
2250       for (k=1;k<size_of_constraint;k++) {
2251         /* there can be more than one constraint on a single connected component */
2252         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2253           min_index = global_indices[k];
2254           min_loc = k;
2255         }
2256       }
2257       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2258       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2259       aux_primal_minloc[total_primal_vertices]=min_loc;
2260       total_primal_vertices++;
2261     }
2262   }
2263   /* determine if a QR strategy is needed for change of basis */
2264   qr_needed = PETSC_FALSE;
2265   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2266   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2267     if (PetscBTLookup(change_basis,i)) {
2268       if (!pcbddc->use_qr_single) {
2269         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2270         j = 0;
2271         for (k=0;k<size_of_constraint;k++) {
2272           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2273             j++;
2274           }
2275         }
2276         /* found more than one primal dof on the cc */
2277         if (j > 1) {
2278           PetscBTSet(qr_needed_idx,i);
2279           qr_needed = PETSC_TRUE;
2280         }
2281       } else {
2282         PetscBTSet(qr_needed_idx,i);
2283         qr_needed = PETSC_TRUE;
2284       }
2285     }
2286   }
2287   /* free workspace */
2288   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2289 
2290   /* permute indices in order to have a sorted set of vertices */
2291   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2292 
2293   /* nonzero structure of constraint matrix */
2294   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2295   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2296   j=total_primal_vertices;
2297   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2298     if (!PetscBTLookup(change_basis,i)) {
2299       nnz[j]=temp_indices[i+1]-temp_indices[i];
2300       j++;
2301     }
2302   }
2303   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2304   ierr = PetscFree(nnz);CHKERRQ(ierr);
2305   /* set values in constraint matrix */
2306   for (i=0;i<total_primal_vertices;i++) {
2307     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2308   }
2309   total_counts = total_primal_vertices;
2310   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2311     if (!PetscBTLookup(change_basis,i)) {
2312       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2313       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2314       total_counts++;
2315     }
2316   }
2317   /* assembling */
2318   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2319   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2320   /*
2321   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2322   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2323   */
2324   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2325   if (pcbddc->use_change_of_basis) {
2326     /* dual and primal dofs on a single cc */
2327     PetscInt     dual_dofs,primal_dofs;
2328     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2329     PetscInt     primal_counter;
2330     /* working stuff for GEQRF */
2331     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2332     PetscBLASInt lqr_work;
2333     /* working stuff for UNGQR */
2334     PetscScalar  *gqr_work,lgqr_work_t;
2335     PetscBLASInt lgqr_work;
2336     /* working stuff for TRTRS */
2337     PetscScalar  *trs_rhs;
2338     PetscBLASInt Blas_NRHS;
2339     /* pointers for values insertion into change of basis matrix */
2340     PetscInt     *start_rows,*start_cols;
2341     PetscScalar  *start_vals;
2342     /* working stuff for values insertion */
2343     PetscBT      is_primal;
2344     /* matrix sizes */
2345     PetscInt     global_size,local_size;
2346     /* temporary change of basis */
2347     Mat          localChangeOfBasisMatrix;
2348 
2349     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2350     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2351     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2352     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2353 
2354     /* nonzeros for local mat */
2355     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2356     for (i=0;i<pcis->n;i++) nnz[i]=1;
2357     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2358       if (PetscBTLookup(change_basis,i)) {
2359         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2360         if (PetscBTLookup(qr_needed_idx,i)) {
2361           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2362         } else {
2363           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2364           /* get local primal index on the cc */
2365           j = 0;
2366           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2367           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2368         }
2369       }
2370     }
2371     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2372     ierr = PetscFree(nnz);CHKERRQ(ierr);
2373     /* Set initial identity in the matrix */
2374     for (i=0;i<pcis->n;i++) {
2375       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2376     }
2377 
2378     if (pcbddc->dbg_flag) {
2379       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2380       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2381     }
2382 
2383 
2384     /* Now we loop on the constraints which need a change of basis */
2385     /*
2386        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2387        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2388 
2389        Basic blocks of change of basis matrix T computed by
2390 
2391           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2392 
2393             | 1        0   ...        0         s_1/S |
2394             | 0        1   ...        0         s_2/S |
2395             |              ...                        |
2396             | 0        ...            1     s_{n-1}/S |
2397             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2398 
2399             with S = \sum_{i=1}^n s_i^2
2400             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2401                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2402 
2403           - QR decomposition of constraints otherwise
2404     */
2405     if (qr_needed) {
2406       /* space to store Q */
2407       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2408       /* first we issue queries for optimal work */
2409       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2410       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2411       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2412       lqr_work = -1;
2413       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2414       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2415       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2416       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2417       lgqr_work = -1;
2418       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2419       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2420       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2421       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2422       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2423       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2424       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2425       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2426       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2427       /* array to store scaling factors for reflectors */
2428       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2429       /* array to store rhs and solution of triangular solver */
2430       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2431       /* allocating workspace for check */
2432       if (pcbddc->dbg_flag) {
2433         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2434       }
2435     }
2436     /* array to store whether a node is primal or not */
2437     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2438     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2439     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2440     if (i != total_primal_vertices) {
2441       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2442     }
2443     for (i=0;i<total_primal_vertices;i++) {
2444       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2445     }
2446     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2447 
2448     /* loop on constraints and see whether or not they need a change of basis and compute it */
2449     /* -> using implicit ordering contained in temp_indices data */
2450     total_counts = pcbddc->n_vertices;
2451     primal_counter = total_counts;
2452     while (total_counts<pcbddc->local_primal_size) {
2453       primal_dofs = 1;
2454       if (PetscBTLookup(change_basis,total_counts)) {
2455         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2456         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2457           primal_dofs++;
2458         }
2459         /* get constraint info */
2460         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2461         dual_dofs = size_of_constraint-primal_dofs;
2462 
2463         if (pcbddc->dbg_flag) {
2464           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2465         }
2466 
2467         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2468 
2469           /* copy quadrature constraints for change of basis check */
2470           if (pcbddc->dbg_flag) {
2471             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2472           }
2473           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2474           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2475 
2476           /* compute QR decomposition of constraints */
2477           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2478           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2479           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2480           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2481           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2482           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2483           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2484 
2485           /* explictly compute R^-T */
2486           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2487           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2488           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2489           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2490           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2491           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2492           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2493           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2494           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2495           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2496 
2497           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2498           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2499           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2500           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2501           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2502           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2503           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2504           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2505           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2506 
2507           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2508              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2509              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2510           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2511           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2512           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2513           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2514           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2515           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2516           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2517           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2518           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2519           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2520 
2521           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2522           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2523           /* insert cols for primal dofs */
2524           for (j=0;j<primal_dofs;j++) {
2525             start_vals = &qr_basis[j*size_of_constraint];
2526             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2527             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2528           }
2529           /* insert cols for dual dofs */
2530           for (j=0,k=0;j<dual_dofs;k++) {
2531             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2532               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2533               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2534               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2535               j++;
2536             }
2537           }
2538 
2539           /* check change of basis */
2540           if (pcbddc->dbg_flag) {
2541             PetscInt   ii,jj;
2542             PetscBool valid_qr=PETSC_TRUE;
2543             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2544             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2545             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2546             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2547             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2548             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2549             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2550             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2551             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2552             for (jj=0;jj<size_of_constraint;jj++) {
2553               for (ii=0;ii<primal_dofs;ii++) {
2554                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2555                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2556               }
2557             }
2558             if (!valid_qr) {
2559               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2560               for (jj=0;jj<size_of_constraint;jj++) {
2561                 for (ii=0;ii<primal_dofs;ii++) {
2562                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2563                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2564                   }
2565                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2566                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2567                   }
2568                 }
2569               }
2570             } else {
2571               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2572             }
2573           }
2574         } else { /* simple transformation block */
2575           PetscInt    row,col;
2576           PetscScalar val,norm;
2577 
2578           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2579           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2580           for (j=0;j<size_of_constraint;j++) {
2581             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2582             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2583             if (!PetscBTLookup(is_primal,row_B)) {
2584               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2585               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2586               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2587             } else {
2588               for (k=0;k<size_of_constraint;k++) {
2589                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2590                 if (row != col) {
2591                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2592                 } else {
2593                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2594                 }
2595                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2596               }
2597             }
2598           }
2599           if (pcbddc->dbg_flag) {
2600             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2601           }
2602         }
2603         /* increment primal counter */
2604         primal_counter += primal_dofs;
2605       } else {
2606         if (pcbddc->dbg_flag) {
2607           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2608         }
2609       }
2610       /* increment constraint counter total_counts */
2611       total_counts += primal_dofs;
2612     }
2613 
2614     /* free workspace */
2615     if (qr_needed) {
2616       if (pcbddc->dbg_flag) {
2617         ierr = PetscFree(work);CHKERRQ(ierr);
2618       }
2619       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2620       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2621       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2622       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2623       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2624     }
2625     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2626     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2627     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2628 
2629     /* assembling of global change of variable */
2630     {
2631       Mat tmat;
2632       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2633       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2634       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2635       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2636       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2637       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2638       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2639       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2640       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2641       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2642       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2643       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2644       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2645       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2646       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2647       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2648     }
2649     /* check */
2650     if (pcbddc->dbg_flag) {
2651       PetscReal error;
2652       Vec       x,x_change;
2653 
2654       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2655       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2656       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2657       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2658       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2659       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2660       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2661       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2662       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2663       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2664       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2665       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2666       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2667       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2668       ierr = VecDestroy(&x);CHKERRQ(ierr);
2669       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2670     }
2671 
2672     /* adapt sub_schurs computed (if any) */
2673     if (pcbddc->use_deluxe_scaling) {
2674       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2675       if (sub_schurs->n_subs_par_g) {
2676         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2677       }
2678       if (sub_schurs->S_Ej_all) {
2679         Mat S_1,S_2,tmat;
2680         IS is_all_N;
2681 
2682         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2683         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2684         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2685         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2686         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2687         sub_schurs->S_Ej_all = S_1;
2688         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2689         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2690         sub_schurs->sum_S_Ej_all = S_2;
2691         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2692       }
2693     }
2694     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2695   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2696     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2697     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2698   }
2699 
2700   /* set up change of basis context */
2701   if (pcbddc->ChangeOfBasisMatrix) {
2702     PCBDDCChange_ctx change_ctx;
2703 
2704     if (!pcbddc->new_global_mat) {
2705       PetscInt global_size,local_size;
2706 
2707       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2708       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2709       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2710       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2711       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2712       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2713       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2714       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2715       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2716     } else {
2717       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2718       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2719       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2720     }
2721     if (!pcbddc->user_ChangeOfBasisMatrix) {
2722       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2723       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2724     } else {
2725       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2726       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2727     }
2728     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2729     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2730     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2731     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2732   }
2733 
2734   /* get indices in local ordering for vertices and constraints */
2735   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2736     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2737     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2738   }
2739   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2740   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2741   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2742   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2743   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2744   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2745   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2746   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2747   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2748   /* set quantities in PCBDDC data struct */
2749   pcbddc->n_actual_vertices = i;
2750   /* check if a new primal space has been introduced */
2751   pcbddc->new_primal_space_local = PETSC_TRUE;
2752   if (olocal_primal_size == pcbddc->local_primal_size) {
2753     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2754     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2755     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2756   }
2757   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2758   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2759 
2760   /* flush dbg viewer */
2761   if (pcbddc->dbg_flag) {
2762     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2763   }
2764 
2765   /* free workspace */
2766   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2767   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2768   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2769   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2770   if (!pcbddc->adaptive_selection) {
2771     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2772     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2773   } else {
2774     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2775                       pcbddc->adaptive_constraints_ptrs,
2776                       pcbddc->adaptive_constraints_idxs,
2777                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2778     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2779   }
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 #undef __FUNCT__
2784 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2785 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2786 {
2787   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2788   PC_IS       *pcis = (PC_IS*)pc->data;
2789   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2790   PetscInt    ierr,i,vertex_size;
2791   PetscViewer viewer=pcbddc->dbg_viewer;
2792 
2793   PetscFunctionBegin;
2794   /* Reset previously computed graph */
2795   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2796   /* Init local Graph struct */
2797   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2798 
2799   /* Check validity of the csr graph passed in by the user */
2800   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2801     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2802   }
2803 
2804   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2805   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2806     Mat       mat_adj;
2807     PetscInt  *xadj,*adjncy;
2808     PetscInt  nvtxs;
2809     PetscBool flg_row=PETSC_TRUE;
2810 
2811     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2812     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2813     if (!flg_row) {
2814       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2815     }
2816     if (pcbddc->use_local_adj) {
2817       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2818       pcbddc->computed_rowadj = PETSC_TRUE;
2819     } else { /* just compute subdomain's connected components */
2820       IS                     is_dummy;
2821       ISLocalToGlobalMapping l2gmap_dummy;
2822       PetscInt               j,sum;
2823       PetscInt               *cxadj,*cadjncy;
2824       const PetscInt         *idxs;
2825       PCBDDCGraph            graph;
2826       PetscBT                is_on_boundary;
2827 
2828       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2829       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2830       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2831       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2832       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2833       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2834       graph->xadj = xadj;
2835       graph->adjncy = adjncy;
2836       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2837       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2838 
2839       if (pcbddc->dbg_flag) {
2840         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2841         for (i=0;i<graph->ncc;i++) {
2842           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2843         }
2844         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2845       }
2846 
2847       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2848       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2849       for (i=0;i<pcis->n_B;i++) {
2850         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2851       }
2852       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2853 
2854       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2855       sum = 0;
2856       for (i=0;i<graph->ncc;i++) {
2857         PetscInt sizecc = 0;
2858         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2859           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2860             sizecc++;
2861           }
2862         }
2863         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2864           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2865             cxadj[graph->queue[j]] = sizecc;
2866           }
2867         }
2868         sum += sizecc*sizecc;
2869       }
2870       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2871       sum = 0;
2872       for (i=0;i<nvtxs;i++) {
2873         PetscInt temp = cxadj[i];
2874         cxadj[i] = sum;
2875         sum += temp;
2876       }
2877       cxadj[nvtxs] = sum;
2878       for (i=0;i<graph->ncc;i++) {
2879         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2880           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2881             PetscInt k,sizecc = 0;
2882             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2883               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2884                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2885                 sizecc++;
2886               }
2887             }
2888           }
2889         }
2890       }
2891       if (nvtxs) {
2892         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2893       } else {
2894         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2895         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2896       }
2897       graph->xadj = 0;
2898       graph->adjncy = 0;
2899       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2900       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2901     }
2902     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2903     if (!flg_row) {
2904       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2905     }
2906     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2907   }
2908 
2909   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2910   vertex_size = 1;
2911   if (pcbddc->user_provided_isfordofs) {
2912     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2913       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2914       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2915         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2916         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2917       }
2918       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2919       pcbddc->n_ISForDofs = 0;
2920       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2921     }
2922     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2923     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2924   } else {
2925     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2926       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2927       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2928       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2929         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2930       }
2931     }
2932   }
2933 
2934   /* Setup of Graph */
2935   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2936     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2937   }
2938   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2939     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2940   }
2941   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2942 
2943   /* Graph's connected components analysis */
2944   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2945 
2946   /* print some info to stdout */
2947   if (pcbddc->dbg_flag) {
2948     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2949   }
2950 
2951   /* mark topography has done */
2952   pcbddc->recompute_topography = PETSC_FALSE;
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2958 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2959 {
2960   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2961   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2962   PetscErrorCode ierr;
2963 
2964   PetscFunctionBegin;
2965   n = 0;
2966   vertices = 0;
2967   if (pcbddc->ConstraintMatrix) {
2968     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2969     for (i=0;i<local_primal_size;i++) {
2970       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2971       if (size_of_constraint == 1) n++;
2972       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2973     }
2974     if (vertices_idx) {
2975       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2976       n = 0;
2977       for (i=0;i<local_primal_size;i++) {
2978         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2979         if (size_of_constraint == 1) {
2980           vertices[n++]=row_cmat_indices[0];
2981         }
2982         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2983       }
2984     }
2985   }
2986   *n_vertices = n;
2987   if (vertices_idx) *vertices_idx = vertices;
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 #undef __FUNCT__
2992 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2993 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2994 {
2995   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2996   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2997   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2998   PetscBT        touched;
2999   PetscErrorCode ierr;
3000 
3001     /* This function assumes that the number of local constraints per connected component
3002        is not greater than the number of nodes defined for the connected component
3003        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3004   PetscFunctionBegin;
3005   n = 0;
3006   constraints_index = 0;
3007   if (pcbddc->ConstraintMatrix) {
3008     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3009     max_size_of_constraint = 0;
3010     for (i=0;i<local_primal_size;i++) {
3011       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3012       if (size_of_constraint > 1) {
3013         n++;
3014       }
3015       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3016       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3017     }
3018     if (constraints_idx) {
3019       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3020       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3021       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3022       n = 0;
3023       for (i=0;i<local_primal_size;i++) {
3024         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3025         if (size_of_constraint > 1) {
3026           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3027           /* find first untouched local node */
3028           j = 0;
3029           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3030           min_index = row_cmat_global_indices[j];
3031           min_loc = j;
3032           /* search the minimum among nodes not yet touched on the connected component
3033              since there can be more than one constraint on a single cc */
3034           for (j=1;j<size_of_constraint;j++) {
3035             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3036               min_index = row_cmat_global_indices[j];
3037               min_loc = j;
3038             }
3039           }
3040           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3041           constraints_index[n++] = row_cmat_indices[min_loc];
3042         }
3043         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3044       }
3045       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3046       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3047     }
3048   }
3049   *n_constraints = n;
3050   if (constraints_idx) *constraints_idx = constraints_index;
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "PCBDDCSubsetNumbering"
3056 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3057 {
3058   Vec            local_vec,global_vec;
3059   IS             seqis,paris;
3060   VecScatter     scatter_ctx;
3061   PetscScalar    *array;
3062   PetscInt       *temp_global_dofs;
3063   PetscScalar    globalsum;
3064   PetscInt       i,j,s;
3065   PetscInt       nlocals,first_index,old_index,max_local;
3066   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3067   PetscMPIInt    *dof_sizes,*dof_displs;
3068   PetscBool      first_found;
3069   PetscErrorCode ierr;
3070 
3071   PetscFunctionBegin;
3072   /* mpi buffers */
3073   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3074   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3075   j = ( !rank_prec_comm ? size_prec_comm : 0);
3076   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3077   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3078   /* get maximum size of subset */
3079   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3080   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3081   max_local = 0;
3082   for (i=0;i<n_local_dofs;i++) {
3083     if (max_local < temp_global_dofs[i] ) {
3084       max_local = temp_global_dofs[i];
3085     }
3086   }
3087   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3088   max_global++;
3089   max_local = 0;
3090   for (i=0;i<n_local_dofs;i++) {
3091     if (max_local < local_dofs[i] ) {
3092       max_local = local_dofs[i];
3093     }
3094   }
3095   max_local++;
3096   /* allocate workspace */
3097   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3098   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3099   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3100   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3101   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3102   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3103   /* create scatter */
3104   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3105   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3106   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3107   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3108   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3109   /* init array */
3110   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3111   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3112   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3113   if (local_dofs_mult) {
3114     for (i=0;i<n_local_dofs;i++) {
3115       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3116     }
3117   } else {
3118     for (i=0;i<n_local_dofs;i++) {
3119       array[local_dofs[i]]=1.0;
3120     }
3121   }
3122   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3123   /* scatter into global vec and get total number of global dofs */
3124   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3126   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3127   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3128   /* Fill global_vec with cumulative function for global numbering */
3129   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3130   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3131   nlocals = 0;
3132   first_index = -1;
3133   first_found = PETSC_FALSE;
3134   for (i=0;i<s;i++) {
3135     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3136       first_found = PETSC_TRUE;
3137       first_index = i;
3138     }
3139     nlocals += (PetscInt)PetscRealPart(array[i]);
3140   }
3141   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3142   if (!rank_prec_comm) {
3143     dof_displs[0]=0;
3144     for (i=1;i<size_prec_comm;i++) {
3145       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3146     }
3147   }
3148   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3149   if (first_found) {
3150     array[first_index] += (PetscScalar)nlocals;
3151     old_index = first_index;
3152     for (i=first_index+1;i<s;i++) {
3153       if (PetscRealPart(array[i]) > 0.1) {
3154         array[i] += array[old_index];
3155         old_index = i;
3156       }
3157     }
3158   }
3159   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3160   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3161   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3162   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3163   /* get global ordering of local dofs */
3164   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3165   if (local_dofs_mult) {
3166     for (i=0;i<n_local_dofs;i++) {
3167       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3168     }
3169   } else {
3170     for (i=0;i<n_local_dofs;i++) {
3171       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3172     }
3173   }
3174   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3175   /* free workspace */
3176   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3177   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3178   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3179   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3180   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3181   /* return pointer to global ordering of local dofs */
3182   *global_numbering_subset = temp_global_dofs;
3183   PetscFunctionReturn(0);
3184 }
3185 
3186 #undef __FUNCT__
3187 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3188 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3189 {
3190   PetscInt       i,j;
3191   PetscScalar    *alphas;
3192   PetscErrorCode ierr;
3193 
3194   PetscFunctionBegin;
3195   /* this implements stabilized Gram-Schmidt */
3196   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3197   for (i=0;i<n;i++) {
3198     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3199     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3200     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3201   }
3202   ierr = PetscFree(alphas);CHKERRQ(ierr);
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatISGetSubassemblingPattern"
3208 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3209 {
3210   Mat             subdomain_adj;
3211   IS              new_ranks,ranks_send_to;
3212   MatPartitioning partitioner;
3213   Mat_IS          *matis;
3214   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3215   PetscInt        prank;
3216   PetscMPIInt     size,rank,color;
3217   PetscInt        *xadj,*adjncy,*oldranks;
3218   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3219   PetscInt        i,local_size,threshold=0;
3220   PetscErrorCode  ierr;
3221   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3222   PetscSubcomm    subcomm;
3223 
3224   PetscFunctionBegin;
3225   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3226   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3227   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3228 
3229   /* Get info on mapping */
3230   matis = (Mat_IS*)(mat->data);
3231   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3232   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3233 
3234   /* build local CSR graph of subdomains' connectivity */
3235   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3236   xadj[0] = 0;
3237   xadj[1] = PetscMax(n_neighs-1,0);
3238   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3239   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3240 
3241   if (threshold) {
3242     PetscInt xadj_count = 0;
3243     for (i=1;i<n_neighs;i++) {
3244       if (n_shared[i] > threshold) {
3245         adjncy[xadj_count] = neighs[i];
3246         adjncy_wgt[xadj_count] = n_shared[i];
3247         xadj_count++;
3248       }
3249     }
3250     xadj[1] = xadj_count;
3251   } else {
3252     if (xadj[1]) {
3253       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3254       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3255     }
3256   }
3257   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3258   if (use_square) {
3259     for (i=0;i<xadj[1];i++) {
3260       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3261     }
3262   }
3263   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3264 
3265   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3266 
3267   /*
3268     Restrict work on active processes only.
3269   */
3270   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3271   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3272   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3273   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3274   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3275   if (color) {
3276     ierr = PetscFree(xadj);CHKERRQ(ierr);
3277     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3278     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3279   } else {
3280     PetscInt coarsening_ratio;
3281     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3282     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3283     prank = rank;
3284     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3285     /*
3286     for (i=0;i<size;i++) {
3287       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3288     }
3289     */
3290     for (i=0;i<xadj[1];i++) {
3291       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3292     }
3293     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3294     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3295     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3296 
3297     /* Partition */
3298     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3299     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3300     if (use_vwgt) {
3301       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3302       v_wgt[0] = local_size;
3303       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3304     }
3305     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3306     coarsening_ratio = size/n_subdomains;
3307     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3308     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3309     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3310     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3311 
3312     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3313     if (contiguous) {
3314       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3315     } else {
3316       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3317     }
3318     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3319     /* clean up */
3320     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3321     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3322     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3323     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3324   }
3325   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3326 
3327   /* assemble parallel IS for sends */
3328   i = 1;
3329   if (color) i=0;
3330   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3331 
3332   /* get back IS */
3333   *is_sends = ranks_send_to;
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3338 
3339 #undef __FUNCT__
3340 #define __FUNCT__ "MatISSubassemble"
3341 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3342 {
3343   Mat                    local_mat;
3344   Mat_IS                 *matis;
3345   IS                     is_sends_internal;
3346   PetscInt               rows,cols,new_local_rows;
3347   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3348   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3349   ISLocalToGlobalMapping l2gmap;
3350   PetscInt*              l2gmap_indices;
3351   const PetscInt*        is_indices;
3352   MatType                new_local_type;
3353   /* buffers */
3354   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3355   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3356   PetscInt               *recv_buffer_idxs_local;
3357   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3358   /* MPI */
3359   MPI_Comm               comm,comm_n;
3360   PetscSubcomm           subcomm;
3361   PetscMPIInt            n_sends,n_recvs,commsize;
3362   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3363   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3364   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3365   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3366   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3367   PetscErrorCode         ierr;
3368 
3369   PetscFunctionBegin;
3370   /* TODO: add missing checks */
3371   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3372   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3373   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3374   PetscValidLogicalCollectiveInt(mat,nis,7);
3375   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3376   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3377   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3378   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3379   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3380   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3381   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3382   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3383     PetscInt mrows,mcols,mnrows,mncols;
3384     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3385     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3386     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3387     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3388     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3389     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3390   }
3391   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3392   PetscValidLogicalCollectiveInt(mat,bs,0);
3393   /* prepare IS for sending if not provided */
3394   if (!is_sends) {
3395     PetscBool pcontig = PETSC_TRUE;
3396     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3397     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3398   } else {
3399     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3400     is_sends_internal = is_sends;
3401   }
3402 
3403   /* get pointer of MATIS data */
3404   matis = (Mat_IS*)mat->data;
3405 
3406   /* get comm */
3407   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3408 
3409   /* compute number of sends */
3410   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3411   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3412 
3413   /* compute number of receives */
3414   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3415   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3416   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3417   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3418   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3419   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3420   ierr = PetscFree(iflags);CHKERRQ(ierr);
3421 
3422   /* restrict comm if requested */
3423   subcomm = 0;
3424   destroy_mat = PETSC_FALSE;
3425   if (restrict_comm) {
3426     PetscMPIInt color,rank,subcommsize;
3427     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3428     color = 0;
3429     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3430     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3431     subcommsize = commsize - subcommsize;
3432     /* check if reuse has been requested */
3433     if (reuse == MAT_REUSE_MATRIX) {
3434       if (*mat_n) {
3435         PetscMPIInt subcommsize2;
3436         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3437         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3438         comm_n = PetscObjectComm((PetscObject)*mat_n);
3439       } else {
3440         comm_n = PETSC_COMM_SELF;
3441       }
3442     } else { /* MAT_INITIAL_MATRIX */
3443       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3444       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3445       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3446       comm_n = subcomm->comm;
3447     }
3448     /* flag to destroy *mat_n if not significative */
3449     if (color) destroy_mat = PETSC_TRUE;
3450   } else {
3451     comm_n = comm;
3452   }
3453 
3454   /* prepare send/receive buffers */
3455   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3456   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3457   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3458   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3459   if (nis) {
3460     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3461   }
3462 
3463   /* Get data from local matrices */
3464   if (!isdense) {
3465     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3466     /* TODO: See below some guidelines on how to prepare the local buffers */
3467     /*
3468        send_buffer_vals should contain the raw values of the local matrix
3469        send_buffer_idxs should contain:
3470        - MatType_PRIVATE type
3471        - PetscInt        size_of_l2gmap
3472        - PetscInt        global_row_indices[size_of_l2gmap]
3473        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3474     */
3475   } else {
3476     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3477     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3478     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3479     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3480     send_buffer_idxs[1] = i;
3481     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3482     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3483     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3484     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3485     for (i=0;i<n_sends;i++) {
3486       ilengths_vals[is_indices[i]] = len*len;
3487       ilengths_idxs[is_indices[i]] = len+2;
3488     }
3489   }
3490   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3491   /* additional is (if any) */
3492   if (nis) {
3493     PetscMPIInt psum;
3494     PetscInt j;
3495     for (j=0,psum=0;j<nis;j++) {
3496       PetscInt plen;
3497       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3498       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3499       psum += len+1; /* indices + lenght */
3500     }
3501     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3502     for (j=0,psum=0;j<nis;j++) {
3503       PetscInt plen;
3504       const PetscInt *is_array_idxs;
3505       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3506       send_buffer_idxs_is[psum] = plen;
3507       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3508       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3509       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3510       psum += plen+1; /* indices + lenght */
3511     }
3512     for (i=0;i<n_sends;i++) {
3513       ilengths_idxs_is[is_indices[i]] = psum;
3514     }
3515     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3516   }
3517 
3518   buf_size_idxs = 0;
3519   buf_size_vals = 0;
3520   buf_size_idxs_is = 0;
3521   for (i=0;i<n_recvs;i++) {
3522     buf_size_idxs += (PetscInt)olengths_idxs[i];
3523     buf_size_vals += (PetscInt)olengths_vals[i];
3524     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3525   }
3526   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3527   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3528   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3529 
3530   /* get new tags for clean communications */
3531   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3532   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3533   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3534 
3535   /* allocate for requests */
3536   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3537   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3538   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3539   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3540   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3541   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3542 
3543   /* communications */
3544   ptr_idxs = recv_buffer_idxs;
3545   ptr_vals = recv_buffer_vals;
3546   ptr_idxs_is = recv_buffer_idxs_is;
3547   for (i=0;i<n_recvs;i++) {
3548     source_dest = onodes[i];
3549     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3550     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3551     ptr_idxs += olengths_idxs[i];
3552     ptr_vals += olengths_vals[i];
3553     if (nis) {
3554       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3555       ptr_idxs_is += olengths_idxs_is[i];
3556     }
3557   }
3558   for (i=0;i<n_sends;i++) {
3559     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3560     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3561     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3562     if (nis) {
3563       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3564     }
3565   }
3566   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3567   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3568 
3569   /* assemble new l2g map */
3570   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3571   ptr_idxs = recv_buffer_idxs;
3572   new_local_rows = 0;
3573   for (i=0;i<n_recvs;i++) {
3574     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3575     ptr_idxs += olengths_idxs[i];
3576   }
3577   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3578   ptr_idxs = recv_buffer_idxs;
3579   new_local_rows = 0;
3580   for (i=0;i<n_recvs;i++) {
3581     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3582     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3583     ptr_idxs += olengths_idxs[i];
3584   }
3585   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3586   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3587   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3588 
3589   /* infer new local matrix type from received local matrices type */
3590   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3591   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3592   if (n_recvs) {
3593     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3594     ptr_idxs = recv_buffer_idxs;
3595     for (i=0;i<n_recvs;i++) {
3596       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3597         new_local_type_private = MATAIJ_PRIVATE;
3598         break;
3599       }
3600       ptr_idxs += olengths_idxs[i];
3601     }
3602     switch (new_local_type_private) {
3603       case MATDENSE_PRIVATE:
3604         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3605           new_local_type = MATSEQAIJ;
3606           bs = 1;
3607         } else { /* if I receive only 1 dense matrix */
3608           new_local_type = MATSEQDENSE;
3609           bs = 1;
3610         }
3611         break;
3612       case MATAIJ_PRIVATE:
3613         new_local_type = MATSEQAIJ;
3614         bs = 1;
3615         break;
3616       case MATBAIJ_PRIVATE:
3617         new_local_type = MATSEQBAIJ;
3618         break;
3619       case MATSBAIJ_PRIVATE:
3620         new_local_type = MATSEQSBAIJ;
3621         break;
3622       default:
3623         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3624         break;
3625     }
3626   } else { /* by default, new_local_type is seqdense */
3627     new_local_type = MATSEQDENSE;
3628     bs = 1;
3629   }
3630 
3631   /* create MATIS object if needed */
3632   if (reuse == MAT_INITIAL_MATRIX) {
3633     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3634     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3635   } else {
3636     /* it also destroys the local matrices */
3637     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3638   }
3639   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3640   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3641 
3642   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3643 
3644   /* Global to local map of received indices */
3645   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3646   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3647   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3648 
3649   /* restore attributes -> type of incoming data and its size */
3650   buf_size_idxs = 0;
3651   for (i=0;i<n_recvs;i++) {
3652     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3653     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3654     buf_size_idxs += (PetscInt)olengths_idxs[i];
3655   }
3656   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3657 
3658   /* set preallocation */
3659   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3660   if (!newisdense) {
3661     PetscInt *new_local_nnz=0;
3662 
3663     ptr_vals = recv_buffer_vals;
3664     ptr_idxs = recv_buffer_idxs_local;
3665     if (n_recvs) {
3666       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3667     }
3668     for (i=0;i<n_recvs;i++) {
3669       PetscInt j;
3670       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3671         for (j=0;j<*(ptr_idxs+1);j++) {
3672           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3673         }
3674       } else {
3675         /* TODO */
3676       }
3677       ptr_idxs += olengths_idxs[i];
3678     }
3679     if (new_local_nnz) {
3680       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3681       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3682       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3683       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3684       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3685       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3686     } else {
3687       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3688     }
3689     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3690   } else {
3691     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3692   }
3693 
3694   /* set values */
3695   ptr_vals = recv_buffer_vals;
3696   ptr_idxs = recv_buffer_idxs_local;
3697   for (i=0;i<n_recvs;i++) {
3698     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3699       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3700       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3701       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3702       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3703       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3704     } else {
3705       /* TODO */
3706     }
3707     ptr_idxs += olengths_idxs[i];
3708     ptr_vals += olengths_vals[i];
3709   }
3710   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3711   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3712   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3713   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3714   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3715   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3716 
3717 #if 0
3718   if (!restrict_comm) { /* check */
3719     Vec       lvec,rvec;
3720     PetscReal infty_error;
3721 
3722     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3723     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3724     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3725     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3726     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3727     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3728     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3729     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3730     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3731   }
3732 #endif
3733 
3734   /* assemble new additional is (if any) */
3735   if (nis) {
3736     PetscInt **temp_idxs,*count_is,j,psum;
3737 
3738     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3739     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3740     ptr_idxs = recv_buffer_idxs_is;
3741     psum = 0;
3742     for (i=0;i<n_recvs;i++) {
3743       for (j=0;j<nis;j++) {
3744         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3745         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3746         psum += plen;
3747         ptr_idxs += plen+1; /* shift pointer to received data */
3748       }
3749     }
3750     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3751     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3752     for (i=1;i<nis;i++) {
3753       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3754     }
3755     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3756     ptr_idxs = recv_buffer_idxs_is;
3757     for (i=0;i<n_recvs;i++) {
3758       for (j=0;j<nis;j++) {
3759         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3760         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3761         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3762         ptr_idxs += plen+1; /* shift pointer to received data */
3763       }
3764     }
3765     for (i=0;i<nis;i++) {
3766       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3767       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3768       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3769     }
3770     ierr = PetscFree(count_is);CHKERRQ(ierr);
3771     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3772     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3773   }
3774   /* free workspace */
3775   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3776   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3777   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3778   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3779   if (isdense) {
3780     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3781     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3782   } else {
3783     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3784   }
3785   if (nis) {
3786     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3787     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3788   }
3789   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3790   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3791   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3792   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3793   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3794   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3795   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3796   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3797   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3798   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3799   ierr = PetscFree(onodes);CHKERRQ(ierr);
3800   if (nis) {
3801     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3802     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3803     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3804   }
3805   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3806   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3807     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3808     for (i=0;i<nis;i++) {
3809       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3810     }
3811   }
3812   PetscFunctionReturn(0);
3813 }
3814 
3815 /* temporary hack into ksp private data structure */
3816 #include <petsc-private/kspimpl.h>
3817 
3818 #undef __FUNCT__
3819 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3820 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3821 {
3822   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3823   PC_IS                  *pcis = (PC_IS*)pc->data;
3824   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3825   MatNullSpace           CoarseNullSpace=NULL;
3826   ISLocalToGlobalMapping coarse_islg;
3827   IS                     coarse_is,*isarray;
3828   PetscInt               i,im_active=-1,active_procs=-1;
3829   PetscInt               nis,nisdofs,nisneu;
3830   PC                     pc_temp;
3831   PCType                 coarse_pc_type;
3832   KSPType                coarse_ksp_type;
3833   PetscBool              multilevel_requested,multilevel_allowed;
3834   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3835   Mat                    t_coarse_mat_is;
3836   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3837   PetscMPIInt            all_procs;
3838   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3839   PetscBool              compute_vecs = PETSC_FALSE;
3840   PetscScalar            *array;
3841   PetscErrorCode         ierr;
3842 
3843   PetscFunctionBegin;
3844   /* Assign global numbering to coarse dofs */
3845   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3846     compute_vecs = PETSC_TRUE;
3847     PetscInt ocoarse_size;
3848     ocoarse_size = pcbddc->coarse_size;
3849     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3850     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3851     /* see if we can avoid some work */
3852     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3853       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3854         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3855         coarse_reuse = PETSC_FALSE;
3856       } else { /* we can safely reuse already computed coarse matrix */
3857         coarse_reuse = PETSC_TRUE;
3858       }
3859     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3860       coarse_reuse = PETSC_FALSE;
3861     }
3862     /* reset any subassembling information */
3863     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3864     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3865   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3866     coarse_reuse = PETSC_TRUE;
3867   }
3868 
3869   /* count "active" (i.e. with positive local size) and "void" processes */
3870   im_active = !!(pcis->n);
3871   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3872   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3873   void_procs = all_procs-active_procs;
3874   csin_type_simple = PETSC_TRUE;
3875   redist = PETSC_FALSE;
3876   if (pcbddc->current_level && void_procs) {
3877     csin_ml = PETSC_TRUE;
3878     ncoarse_ml = void_procs;
3879     csin_ds = PETSC_TRUE;
3880     ncoarse_ds = void_procs;
3881   } else {
3882     csin_ml = PETSC_FALSE;
3883     ncoarse_ml = all_procs;
3884     if (void_procs) {
3885       csin_ds = PETSC_TRUE;
3886       ncoarse_ds = void_procs;
3887       csin_type_simple = PETSC_FALSE;
3888     } else {
3889       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3890         csin_ds = PETSC_TRUE;
3891         ncoarse_ds = pcbddc->redistribute_coarse;
3892         redist = PETSC_TRUE;
3893       } else {
3894         csin_ds = PETSC_FALSE;
3895         ncoarse_ds = all_procs;
3896       }
3897     }
3898   }
3899 
3900   /*
3901     test if we can go multilevel: three conditions must be satisfied:
3902     - we have not exceeded the number of levels requested
3903     - we can actually subassemble the active processes
3904     - we can find a suitable number of MPI processes where we can place the subassembled problem
3905   */
3906   multilevel_allowed = PETSC_FALSE;
3907   multilevel_requested = PETSC_FALSE;
3908   if (pcbddc->current_level < pcbddc->max_levels) {
3909     multilevel_requested = PETSC_TRUE;
3910     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3911       multilevel_allowed = PETSC_FALSE;
3912     } else {
3913       multilevel_allowed = PETSC_TRUE;
3914     }
3915   }
3916   /* determine number of process partecipating to coarse solver */
3917   if (multilevel_allowed) {
3918     ncoarse = ncoarse_ml;
3919     csin = csin_ml;
3920   } else {
3921     ncoarse = ncoarse_ds;
3922     csin = csin_ds;
3923   }
3924 
3925   /* creates temporary l2gmap and IS for coarse indexes */
3926   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3927   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3928 
3929   /* creates temporary MATIS object for coarse matrix */
3930   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3931   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3932   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3933   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3934 #if 0
3935   {
3936     PetscViewer viewer;
3937     char filename[256];
3938     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3939     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3940     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3941     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3942     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3943   }
3944 #endif
3945   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3946   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3947   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3948   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3949   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3950 
3951   /* compute dofs splitting and neumann boundaries for coarse dofs */
3952   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3953     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3954     const PetscInt         *idxs;
3955     ISLocalToGlobalMapping tmap;
3956 
3957     /* create map between primal indices (in local representative ordering) and local primal numbering */
3958     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3959     /* allocate space for temporary storage */
3960     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3961     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3962     /* allocate for IS array */
3963     nisdofs = pcbddc->n_ISForDofsLocal;
3964     nisneu = !!pcbddc->NeumannBoundariesLocal;
3965     nis = nisdofs + nisneu;
3966     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3967     /* dofs splitting */
3968     for (i=0;i<nisdofs;i++) {
3969       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3970       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3971       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3972       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3973       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3974       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3975       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3976       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3977     }
3978     /* neumann boundaries */
3979     if (pcbddc->NeumannBoundariesLocal) {
3980       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3981       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3982       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3983       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3984       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3985       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3986       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3987       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3988     }
3989     /* free memory */
3990     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3991     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3992     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3993   } else {
3994     nis = 0;
3995     nisdofs = 0;
3996     nisneu = 0;
3997     isarray = NULL;
3998   }
3999   /* destroy no longer needed map */
4000   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4001 
4002   /* restrict on coarse candidates (if needed) */
4003   coarse_mat_is = NULL;
4004   if (csin) {
4005     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4006       if (redist) {
4007         PetscMPIInt rank;
4008         PetscInt spc,n_spc_p1,dest[1];
4009 
4010         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4011         spc = all_procs/pcbddc->redistribute_coarse;
4012         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
4013         if (rank > n_spc_p1*(spc+1)-1) {
4014           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4015         } else {
4016           dest[0] = rank/(spc+1);
4017         }
4018         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4019       } else {
4020         PetscInt j,tissize,*nisindices;
4021         PetscInt *coarse_candidates;
4022         const PetscInt* tisindices;
4023         /* get coarse candidates' ranks in pc communicator */
4024         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4025         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4026         for (i=0,j=0;i<all_procs;i++) {
4027           if (!coarse_candidates[i]) {
4028             coarse_candidates[j]=i;
4029             j++;
4030           }
4031         }
4032         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
4033         /* get a suitable subassembling pattern */
4034         if (csin_type_simple) {
4035           PetscMPIInt rank;
4036           PetscInt    issize,isidx;
4037           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4038           if (im_active) {
4039             issize = 1;
4040             isidx = (PetscInt)rank;
4041           } else {
4042             issize = 0;
4043             isidx = -1;
4044           }
4045           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4046         } else {
4047           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4048         }
4049         if (pcbddc->dbg_flag) {
4050           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4051           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4052           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4053           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4054           for (i=0;i<j;i++) {
4055             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4056           }
4057           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4058           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4059         }
4060         /* shift the pattern on coarse candidates */
4061         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4062         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4063         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4064         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4065         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4066         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4067         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4068       }
4069     }
4070     if (pcbddc->dbg_flag) {
4071       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4072       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4073       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4074       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4075     }
4076     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4077     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4078   } else {
4079     if (pcbddc->dbg_flag) {
4080       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4081       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4082       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4083     }
4084     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4085     coarse_mat_is = t_coarse_mat_is;
4086   }
4087 
4088   /* create local to global scatters for coarse problem */
4089   if (compute_vecs) {
4090     PetscInt lrows;
4091     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4092     if (coarse_mat_is) {
4093       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4094     } else {
4095       lrows = 0;
4096     }
4097     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4098     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4099     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4100     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4101     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4102   }
4103   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4104   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4105 
4106   /* set defaults for coarse KSP and PC */
4107   if (multilevel_allowed) {
4108     coarse_ksp_type = KSPRICHARDSON;
4109     coarse_pc_type = PCBDDC;
4110   } else {
4111     coarse_ksp_type = KSPPREONLY;
4112     coarse_pc_type = PCREDUNDANT;
4113   }
4114 
4115   /* print some info if requested */
4116   if (pcbddc->dbg_flag) {
4117     if (!multilevel_allowed) {
4118       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4119       if (multilevel_requested) {
4120         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4121       } else if (pcbddc->max_levels) {
4122         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4123       }
4124       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4125     }
4126   }
4127 
4128   /* create the coarse KSP object only once with defaults */
4129   if (coarse_mat_is) {
4130     MatReuse coarse_mat_reuse;
4131     PetscViewer dbg_viewer = NULL;
4132     if (pcbddc->dbg_flag) {
4133       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4134       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4135     }
4136     if (!pcbddc->coarse_ksp) {
4137       char prefix[256],str_level[16];
4138       size_t len;
4139       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4140       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4141       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4142       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4143       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4144       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4145       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4146       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4147       /* prefix */
4148       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4149       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4150       if (!pcbddc->current_level) {
4151         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4152         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4153       } else {
4154         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4155         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4156         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4157         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4158         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4159         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4160       }
4161       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4162       /* allow user customization */
4163       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4164       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4165     }
4166 
4167     /* get some info after set from options */
4168     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4169     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4170     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4171     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4172     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4173       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4174       isbddc = PETSC_FALSE;
4175     }
4176     if (isredundant) {
4177       KSP inner_ksp;
4178       PC inner_pc;
4179       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4180       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4181       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4182     }
4183 
4184     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4185     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4186     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4187     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4188     if (nisdofs) {
4189       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4190       for (i=0;i<nisdofs;i++) {
4191         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4192       }
4193     }
4194     if (nisneu) {
4195       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4196       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4197     }
4198 
4199     /* assemble coarse matrix */
4200     if (coarse_reuse) {
4201       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4202       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4203       coarse_mat_reuse = MAT_REUSE_MATRIX;
4204     } else {
4205       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4206     }
4207     if (isbddc || isnn) {
4208       if (pcbddc->coarsening_ratio > 1) {
4209         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4210           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4211           if (pcbddc->dbg_flag) {
4212             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4213             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4214             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4215             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4216           }
4217         }
4218         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4219       } else {
4220         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4221         coarse_mat = coarse_mat_is;
4222       }
4223     } else {
4224       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4225     }
4226     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4227 
4228     /* propagate symmetry info to coarse matrix */
4229     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4230     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4231 
4232     /* set operators */
4233     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4234     if (pcbddc->dbg_flag) {
4235       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4236     }
4237   } else { /* processes non partecipating to coarse solver (if any) */
4238     coarse_mat = 0;
4239   }
4240   ierr = PetscFree(isarray);CHKERRQ(ierr);
4241 #if 0
4242   {
4243     PetscViewer viewer;
4244     char filename[256];
4245     sprintf(filename,"coarse_mat.m");
4246     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4247     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4248     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4249     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4250   }
4251 #endif
4252 
4253   /* Compute coarse null space (special handling by BDDC only) */
4254   if (pcbddc->NullSpace) {
4255     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4256   }
4257 
4258   if (pcbddc->coarse_ksp) {
4259     Vec crhs,csol;
4260     PetscBool ispreonly;
4261     if (CoarseNullSpace) {
4262       if (isbddc) {
4263         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4264       } else {
4265         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4266       }
4267     }
4268     /* setup coarse ksp */
4269     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4270     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4271     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4272     /* hack */
4273     if (!csol) {
4274       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4275     }
4276     if (!crhs) {
4277       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4278     }
4279     /* Check coarse problem if in debug mode or if solving with an iterative method */
4280     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4281     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4282       KSP       check_ksp;
4283       KSPType   check_ksp_type;
4284       PC        check_pc;
4285       Vec       check_vec,coarse_vec;
4286       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4287       PetscInt  its;
4288       PetscBool compute_eigs;
4289       PetscReal *eigs_r,*eigs_c;
4290       PetscInt  neigs;
4291       const char *prefix;
4292 
4293       /* Create ksp object suitable for estimation of extreme eigenvalues */
4294       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4295       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4296       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4297       if (ispreonly) {
4298         check_ksp_type = KSPPREONLY;
4299         compute_eigs = PETSC_FALSE;
4300       } else {
4301         check_ksp_type = KSPGMRES;
4302         compute_eigs = PETSC_TRUE;
4303       }
4304       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4305       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4306       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4307       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4308       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4309       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4310       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4311       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4312       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4313       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4314       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4315       /* create random vec */
4316       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4317       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4318       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4319       if (CoarseNullSpace) {
4320         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4321       }
4322       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4323       /* solve coarse problem */
4324       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4325       if (CoarseNullSpace) {
4326         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4327       }
4328       /* set eigenvalue estimation if preonly has not been requested */
4329       if (compute_eigs) {
4330         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4331         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4332         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4333         lambda_max = eigs_r[neigs-1];
4334         lambda_min = eigs_r[0];
4335         if (pcbddc->use_coarse_estimates) {
4336           if (lambda_max>lambda_min) {
4337             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4338             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4339           }
4340         }
4341       }
4342 
4343       /* check coarse problem residual error */
4344       if (pcbddc->dbg_flag) {
4345         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4346         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4347         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4348         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4349         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4350         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4351         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4352         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4353         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4354         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4355         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4356         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4357         if (compute_eigs) {
4358           PetscReal lambda_max_s,lambda_min_s;
4359           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4360           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4361           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4362           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4363           for (i=0;i<neigs;i++) {
4364             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4365           }
4366         }
4367         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4368         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4369       }
4370       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4371       if (compute_eigs) {
4372         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4373         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4374       }
4375     }
4376   }
4377   /* print additional info */
4378   if (pcbddc->dbg_flag) {
4379     /* waits until all processes reaches this point */
4380     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4381     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4382     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4383   }
4384 
4385   /* free memory */
4386   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4387   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4388   PetscFunctionReturn(0);
4389 }
4390 
4391 #undef __FUNCT__
4392 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4393 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4394 {
4395   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4396   PC_IS*         pcis = (PC_IS*)pc->data;
4397   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4398   PetscInt       i,coarse_size;
4399   PetscInt       *local_primal_indices;
4400   PetscErrorCode ierr;
4401 
4402   PetscFunctionBegin;
4403   /* Compute global number of coarse dofs */
4404   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4405     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4406   }
4407   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4408 
4409   /* check numbering */
4410   if (pcbddc->dbg_flag) {
4411     PetscScalar coarsesum,*array;
4412     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4413 
4414     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4415     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4416     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4417     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4418     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4419     for (i=0;i<pcbddc->local_primal_size;i++) {
4420       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4421     }
4422     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4423     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4424     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4425     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4426     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4427     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4428     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4429     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4430     for (i=0;i<pcis->n;i++) {
4431       if (array[i] == 1.0) {
4432         set_error = PETSC_TRUE;
4433         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4434       }
4435     }
4436     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4437     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4438     for (i=0;i<pcis->n;i++) {
4439       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4440     }
4441     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4442     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4443     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4444     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4445     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4446     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4447     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4448       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4449       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4450       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4451       for (i=0;i<pcbddc->local_primal_size;i++) {
4452         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4453       }
4454       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4455     }
4456     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4457     if (set_error_reduced) {
4458       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4459     }
4460   }
4461   /* get back data */
4462   *coarse_size_n = coarse_size;
4463   *local_primal_indices_n = local_primal_indices;
4464   PetscFunctionReturn(0);
4465 }
4466 
4467 #undef __FUNCT__
4468 #define __FUNCT__ "PCBDDCGlobalToLocal"
4469 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4470 {
4471   IS             localis_t;
4472   PetscInt       i,lsize,*idxs,n;
4473   PetscScalar    *vals;
4474   PetscErrorCode ierr;
4475 
4476   PetscFunctionBegin;
4477   /* get indices in local ordering exploiting local to global map */
4478   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4479   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4480   for (i=0;i<lsize;i++) vals[i] = 1.0;
4481   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4482   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4483   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4484   if (idxs) { /* multilevel guard */
4485     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4486   }
4487   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4488   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4489   ierr = PetscFree(vals);CHKERRQ(ierr);
4490   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4491   /* now compute set in local ordering */
4492   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4493   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4494   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4495   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4496   for (i=0,lsize=0;i<n;i++) {
4497     if (PetscRealPart(vals[i]) > 0.5) {
4498       lsize++;
4499     }
4500   }
4501   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4502   for (i=0,lsize=0;i<n;i++) {
4503     if (PetscRealPart(vals[i]) > 0.5) {
4504       idxs[lsize++] = i;
4505     }
4506   }
4507   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4508   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4509   *localis = localis_t;
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4514 #undef __FUNCT__
4515 #define __FUNCT__ "PCBDDCMatMult_Private"
4516 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4517 {
4518   PCBDDCChange_ctx change_ctx;
4519   PetscErrorCode   ierr;
4520 
4521   PetscFunctionBegin;
4522   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4523   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4524   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4525   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4526   PetscFunctionReturn(0);
4527 }
4528 
4529 #undef __FUNCT__
4530 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4531 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4532 {
4533   PCBDDCChange_ctx change_ctx;
4534   PetscErrorCode   ierr;
4535 
4536   PetscFunctionBegin;
4537   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4538   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4539   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4540   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4541   PetscFunctionReturn(0);
4542 }
4543 
4544 #undef __FUNCT__
4545 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4546 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4547 {
4548   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4549   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4550   PetscInt            *used_xadj,*used_adjncy;
4551   PetscBool           free_used_adj;
4552   PetscErrorCode      ierr;
4553 
4554   PetscFunctionBegin;
4555   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4556   free_used_adj = PETSC_FALSE;
4557   if (pcbddc->sub_schurs_layers == -1) {
4558     used_xadj = NULL;
4559     used_adjncy = NULL;
4560   } else {
4561     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4562       used_xadj = pcbddc->mat_graph->xadj;
4563       used_adjncy = pcbddc->mat_graph->adjncy;
4564     } else if (pcbddc->computed_rowadj) {
4565       used_xadj = pcbddc->mat_graph->xadj;
4566       used_adjncy = pcbddc->mat_graph->adjncy;
4567     } else {
4568       Mat            mat_adj;
4569       PetscBool      flg_row=PETSC_TRUE;
4570       const PetscInt *xadj,*adjncy;
4571       PetscInt       nvtxs;
4572 
4573       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4574       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4575       if (!flg_row) {
4576         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4577       }
4578       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4579       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4580       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4581       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4582       if (!flg_row) {
4583         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4584       }
4585       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4586       free_used_adj = PETSC_TRUE;
4587     }
4588   }
4589   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4590 
4591   /* free adjacency */
4592   if (free_used_adj) {
4593     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4594   }
4595   PetscFunctionReturn(0);
4596 }
4597 
4598 #undef __FUNCT__
4599 #define __FUNCT__ "PCBDDCInitSubSchurs"
4600 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4601 {
4602   PC_IS               *pcis=(PC_IS*)pc->data;
4603   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4604   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4605   PCBDDCGraph         graph;
4606   Mat                 S_j;
4607   PetscErrorCode      ierr;
4608 
4609   PetscFunctionBegin;
4610   /* attach interface graph for determining subsets */
4611   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4612     IS verticesIS;
4613 
4614     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4615     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4616     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4617     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4618     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4619     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4620 /*
4621     if (pcbddc->dbg_flag) {
4622       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4623     }
4624 */
4625   } else {
4626     graph = pcbddc->mat_graph;
4627   }
4628 
4629   /* Create Schur complement matrix */
4630   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4631   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4632 
4633   /* sub_schurs init */
4634   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4635   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4636   /* free graph struct */
4637   if (pcbddc->sub_schurs_rebuild) {
4638     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4639   }
4640   PetscFunctionReturn(0);
4641 }
4642