xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 1b96847708efd0f7e072965c87b94dcb40c08329)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       n = PetscMax(n,nmin);
66       cum += subset_size*n;
67       cum2 += n;
68       maxneigs = PetscMax(maxneigs,n);
69     }
70   }
71 
72   if (mss) {
73     if (symmetric) {
74       PetscBLASInt B_itype = 1;
75       PetscBLASInt B_N = mss;
76       PetscScalar  zero = 0.0;
77       PetscScalar  eps = 0.0; /* dlamch? */
78 
79       B_lwork = -1;
80       S = NULL;
81       St = NULL;
82       eigs = NULL;
83       eigv = NULL;
84       B_iwork = NULL;
85       B_ifail = NULL;
86       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
87 #if defined(PETSC_USE_COMPLEX)
88       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
89 #else
90       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
91 #endif
92       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
93       ierr = PetscFPTrapPop();CHKERRQ(ierr);
94     } else {
95         /* TODO */
96     }
97   } else {
98     lwork = 0;
99   }
100 
101   nv = 0;
102   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
103     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
104   }
105   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
106   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
107                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
108 #if defined(PETSC_USE_COMPLEX)
109   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
110 #endif
111   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
112   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
113                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
114                       nv+cum,&pcbddc->adaptive_constraints_idxs,
115                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
116   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
117 
118   maxneigs = 0;
119   cum = cum2 = cumarray = 0;
120   if (sub_schurs->is_Ej_com) {
121     const PetscInt *idxs;
122 
123     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
124     for (cum=0;cum<nv;cum++) {
125       pcbddc->adaptive_constraints_n[cum] = 1;
126       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
127       pcbddc->adaptive_constraints_ptrs[cum] = cum;
128       pcbddc->adaptive_constraints_data[cum] = 1.0;
129     }
130     cum2 = cum;
131     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
132   }
133 
134   if (mss) { /* multilevel */
135     if (pcbddc->use_deluxe_scaling) {
136       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
137     } else {
138       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
139     }
140     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
141   }
142 
143   for (i=0;i<sub_schurs->n_subs;i++) {
144     PetscInt j,subset_size;
145 
146     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
147     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
148       const PetscInt *idxs;
149       PetscScalar    one = 1.0,zero=0.0;
150       PetscBLASInt   B_N;
151 
152       /* S should be copied since we need it for deluxe scaling */
153       if (symmetric) {
154         PetscInt j;
155         for (j=0;j<subset_size;j++) {
156           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
157         }
158         for (j=0;j<subset_size;j++) {
159           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
160         }
161       } else {
162         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
163         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164       }
165       /* is always this the right matrix? */
166       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
167 
168       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
169       /* St = Starray+cumarray; */
170 
171       if (symmetric) {
172         PetscBLASInt B_itype = 1;
173         PetscBLASInt B_IL = 1, B_IU;
174         PetscScalar  eps = -1.0; /* dlamch? */
175 
176         /* ask for eigenvalues lower than thresh */
177         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
178         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
179         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
180 #if defined(PETSC_USE_COMPLEX)
181         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
182 #else
183         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
184 #endif
185         ierr = PetscFPTrapPop();CHKERRQ(ierr);
186         if (B_ierr) {
187           if (B_ierr < 0 ) {
188             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
189           } else if (B_ierr <= B_N) {
190             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
191           } else {
192             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
193           }
194         }
195 
196         if (B_neigs > nmax) {
197           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than maximum required %d.\n",PetscGlobalRank,B_neigs,nmax);
198           B_neigs = nmax;
199         }
200 
201         if (B_neigs < nmin) {
202           PetscBLASInt B_neigs2;
203 
204           B_IL = B_neigs + 1;
205           ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr);
206           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
207           if (symmetric) {
208             PetscInt j;
209             for (j=0;j<subset_size;j++) {
210               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
211             }
212             for (j=0;j<subset_size;j++) {
213               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
214             }
215           } else {
216             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
217             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
218           }
219           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
220 #if defined(PETSC_USE_COMPLEX)
221           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
222 #else
223           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
224 #endif
225           ierr = PetscFPTrapPop();CHKERRQ(ierr);
226           B_neigs += B_neigs2;
227         }
228         if (B_ierr) {
229           if (B_ierr < 0 ) {
230             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
231           } else if (B_ierr <= B_N) {
232             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
233           } else {
234             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
235           }
236         }
237         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
238         for (j=0;j<B_neigs;j++) {
239           if (eigs[j] == 0.0) {
240             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
241           } else {
242             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
243           }
244         }
245       } else {
246           /* TODO */
247       }
248       maxneigs = PetscMax(B_neigs,maxneigs);
249       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
250 
251       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
252       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
253       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
254       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
255       for (j=0;j<B_neigs;j++) {
256 #if 0
257         {
258           PetscBLASInt Blas_N,Blas_one = 1.0;
259           PetscScalar norm;
260           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
261           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
262           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
263             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
264           } else {
265             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
266           }
267           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
268         }
269 #endif
270         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
271         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
272         cum2 += subset_size;
273       }
274       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
275     }
276     /* shift for next computation */
277     cumarray += subset_size*subset_size;
278   }
279   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
280   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
281 
282   if (mss) {
283     if (pcbddc->use_deluxe_scaling) {
284       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
285     } else {
286       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
287     }
288     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
289   }
290   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
291 #if defined(PETSC_USE_COMPLEX)
292   ierr = PetscFree(rwork);CHKERRQ(ierr);
293 #endif
294   if (pcbddc->dbg_flag) {
295     PetscInt maxneigs_r;
296     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
297     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
298   }
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "PCBDDCSetUpSolvers"
304 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
305 {
306   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
307   PetscScalar    *coarse_submat_vals;
308   PetscErrorCode ierr;
309 
310   PetscFunctionBegin;
311   /* Setup local scatters R_to_B and (optionally) R_to_D */
312   /* PCBDDCSetUpLocalWorkVectors should be called first! */
313   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
314 
315   /* Setup local neumann solver ksp_R */
316   /* PCBDDCSetUpLocalScatters should be called first! */
317   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
318 
319   /* Change global null space passed in by the user if change of basis has been requested */
320   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
321     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
322   }
323 
324   /*
325      Setup local correction and local part of coarse basis.
326      Gives back the dense local part of the coarse matrix in column major ordering
327   */
328   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
329 
330   /* Compute total number of coarse nodes and setup coarse solver */
331   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
332 
333   /* free */
334   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 #undef __FUNCT__
339 #define __FUNCT__ "PCBDDCResetCustomization"
340 PetscErrorCode PCBDDCResetCustomization(PC pc)
341 {
342   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
343   PetscErrorCode ierr;
344 
345   PetscFunctionBegin;
346   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
347   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
348   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
349   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
350   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
351   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
352   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
353   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
354   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
355   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
356   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "PCBDDCResetTopography"
362 PetscErrorCode PCBDDCResetTopography(PC pc)
363 {
364   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
365   PetscErrorCode ierr;
366 
367   PetscFunctionBegin;
368   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
369   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
370   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
371   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
372   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "PCBDDCResetSolvers"
378 PetscErrorCode PCBDDCResetSolvers(PC pc)
379 {
380   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
381   PetscScalar    *array;
382   PetscErrorCode ierr;
383 
384   PetscFunctionBegin;
385   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
386   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
387   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
388   ierr = PetscFree(array);CHKERRQ(ierr);
389   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
390   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
391   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
392   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
393   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
394   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
395   if (pcbddc->local_auxmat2) {
396     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
397     ierr = PetscFree(array);CHKERRQ(ierr);
398   }
399   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
400   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
401   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
402   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
403   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
404   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
405   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
406   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
407   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
408   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
409   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
410   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
411   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
412   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
413   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
414   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
420 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
421 {
422   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
423   PC_IS          *pcis = (PC_IS*)pc->data;
424   VecType        impVecType;
425   PetscInt       n_constraints,n_R,old_size;
426   PetscErrorCode ierr;
427 
428   PetscFunctionBegin;
429   if (!pcbddc->ConstraintMatrix) {
430     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
431   }
432   /* get sizes */
433   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
434   n_R = pcis->n-pcbddc->n_actual_vertices;
435   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
436   /* local work vectors (try to avoid unneeded work)*/
437   /* R nodes */
438   old_size = -1;
439   if (pcbddc->vec1_R) {
440     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
441   }
442   if (n_R != old_size) {
443     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
444     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
445     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
446     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
447     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
448     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
449   }
450   /* local primal dofs */
451   old_size = -1;
452   if (pcbddc->vec1_P) {
453     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
454   }
455   if (pcbddc->local_primal_size != old_size) {
456     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
457     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
458     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
459     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
460   }
461   /* local explicit constraints */
462   old_size = -1;
463   if (pcbddc->vec1_C) {
464     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
465   }
466   if (n_constraints && n_constraints != old_size) {
467     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
468     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
469     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
470     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "PCBDDCSetUpCorrection"
477 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
478 {
479   PetscErrorCode         ierr;
480   /* pointers to pcis and pcbddc */
481   PC_IS*                 pcis = (PC_IS*)pc->data;
482   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
483   /* submatrices of local problem */
484   Mat                    A_RV,A_VR,A_VV;
485   /* submatrices of local coarse problem */
486   Mat                    S_VV,S_CV,S_VC,S_CC;
487   /* working matrices */
488   Mat                    C_CR;
489   /* additional working stuff */
490   PC                     pc_R;
491   Mat                    F;
492   PetscBool              isLU,isCHOL,isILU;
493 
494   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
495   PetscScalar            *work;
496   PetscInt               *idx_V_B;
497   PetscInt               n,n_vertices,n_constraints;
498   PetscInt               i,n_R,n_D,n_B;
499   PetscBool              unsymmetric_check;
500   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
501   MatType                impMatType;
502   /* some shortcuts to scalars */
503   PetscScalar            one=1.0,m_one=-1.0;
504 
505   PetscFunctionBegin;
506   /* get number of vertices (corners plus constraints with change of basis)
507      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
508   n_vertices = pcbddc->n_actual_vertices;
509   n_constraints = pcbddc->local_primal_size-n_vertices;
510   /* Set Non-overlapping dimensions */
511   n_B = pcis->n_B; n_D = pcis->n - n_B;
512   n_R = pcis->n-n_vertices;
513 
514   /* Set types for local objects needed by BDDC precondtioner */
515   impMatType = MATSEQDENSE;
516 
517   /* vertices in boundary numbering */
518   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
519   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
520   if (i != n_vertices) {
521     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
522   }
523 
524   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
525   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
526   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
527   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
528   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
529   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
530   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
531   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
532   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
533   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
534 
535   unsymmetric_check = PETSC_FALSE;
536   /* allocate workspace */
537   n = 0;
538   if (n_constraints) {
539     n += n_R*n_constraints;
540   }
541   if (n_vertices) {
542     n = PetscMax(2*n_R*n_vertices,n);
543   }
544   if (!pcbddc->issym) {
545     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
546     unsymmetric_check = PETSC_TRUE;
547   }
548   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
549 
550   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
551   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
552   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
553   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
554   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
555   if (isLU || isILU || isCHOL) {
556     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
557   } else {
558     F = NULL;
559   }
560 
561   /* Precompute stuffs needed for preprocessing and application of BDDC*/
562   if (n_constraints) {
563     Mat M1,M2,M3;
564     IS  is_aux;
565     /* see if we can save some allocations */
566     if (pcbddc->local_auxmat2) {
567       PetscInt on_R,on_constraints;
568       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
569       if (on_R != n_R || on_constraints != n_constraints) {
570         PetscScalar *marray;
571 
572         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
573         ierr = PetscFree(marray);CHKERRQ(ierr);
574         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
575         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
576       }
577     }
578     /* auxiliary matrices */
579     if (!pcbddc->local_auxmat2) {
580       PetscScalar *marray;
581 
582       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
583       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
584       marray += n_R*n_constraints;
585       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
586     }
587 
588     /* Extract constraints on R nodes: C_{CR}  */
589     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
590     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
591     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
592 
593     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
594     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
595     for (i=0;i<n_constraints;i++) {
596       const PetscScalar *row_cmat_values;
597       const PetscInt    *row_cmat_indices;
598       PetscInt          size_of_constraint,j;
599 
600       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
601       for (j=0;j<size_of_constraint;j++) {
602         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
603       }
604       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
605     }
606     if (F) {
607       Mat B;
608 
609       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
610       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
611       ierr = MatDestroy(&B);CHKERRQ(ierr);
612     } else {
613       PetscScalar *xarray;
614       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
615       for (i=0;i<n_constraints;i++) {
616         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
617         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
618         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
619         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
620         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
621       }
622       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
623     }
624 
625     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
626     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
627     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
628     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
629     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
630     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
631     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
632     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
633     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
634     ierr = MatDestroy(&M2);CHKERRQ(ierr);
635     ierr = MatDestroy(&M3);CHKERRQ(ierr);
636     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
637     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
638     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
639     ierr = MatDestroy(&M1);CHKERRQ(ierr);
640   }
641 
642   /* Get submatrices from subdomain matrix */
643   if (n_vertices) {
644     Mat       newmat;
645     IS        is_aux;
646     PetscInt  ibs,mbs;
647     PetscBool issbaij;
648 
649     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
650     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
651     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
652     if (ibs != mbs) { /* need to convert to SEQAIJ */
653       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
654       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
655       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
656       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
657       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
658     } else {
659       /* this is safe */
660       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
661       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
662       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
663         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
664         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
665         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
666         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
667       } else {
668         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
669         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
670       }
671     }
672     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
673   }
674 
675   /* Matrix of coarse basis functions (local) */
676   if (pcbddc->coarse_phi_B) {
677     PetscInt on_B,on_primal,on_D=n_D;
678     if (pcbddc->coarse_phi_D) {
679       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
680     }
681     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
682     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
683       PetscScalar *marray;
684 
685       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
686       ierr = PetscFree(marray);CHKERRQ(ierr);
687       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
688       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
689       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
690       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
691     }
692   }
693 
694   if (!pcbddc->coarse_phi_B) {
695     PetscScalar *marray;
696 
697     n = n_B*pcbddc->local_primal_size;
698     if (pcbddc->switch_static || pcbddc->dbg_flag) {
699       n += n_D*pcbddc->local_primal_size;
700     }
701     if (!pcbddc->issym) {
702       n *= 2;
703     }
704     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
705     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
706     n = n_B*pcbddc->local_primal_size;
707     if (pcbddc->switch_static || pcbddc->dbg_flag) {
708       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
709       n += n_D*pcbddc->local_primal_size;
710     }
711     if (!pcbddc->issym) {
712       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
713       if (pcbddc->switch_static || pcbddc->dbg_flag) {
714         n = n_B*pcbddc->local_primal_size;
715         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
716       }
717     } else {
718       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
719       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
720       if (pcbddc->switch_static || pcbddc->dbg_flag) {
721         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
722         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
723       }
724     }
725   }
726   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
727   /* vertices */
728   if (n_vertices) {
729     Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
730     PetscScalar  *x,*y;
731     PetscBLASInt B_N,B_one = 1;
732 
733     ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
734     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
735     ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
736     if (F) {
737       ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
738     } else {
739       ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
740       for (i=0;i<n_vertices;i++) {
741         ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
742         ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
743         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
744         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
745         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
746       }
747       ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
748     }
749     ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
750     /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
751     if (n_constraints) {
752       Mat B;
753       ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
754       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
755       ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
756       ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
757       ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
758       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
759       ierr = MatDestroy(&B);CHKERRQ(ierr);
760     }
761     ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
762     ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
763     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
764     ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
765     ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
766     ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
767     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
768     ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
769     ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
770     ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
771     ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
772     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
773     /* coarse basis functions */
774     for (i=0;i<n_vertices;i++) {
775       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
776       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
777       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
778       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
779       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
780       y[n_B*i+idx_V_B[i]] = 1.0;
781       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
782       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
783 
784       if (pcbddc->switch_static || pcbddc->dbg_flag) {
785         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
786         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
787         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
788         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
789         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
790         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
791       }
792       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
793     }
794     ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
795     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
796   }
797 
798   if (n_constraints) {
799     Mat B;
800 
801     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
802     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
803     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
804     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
805     if (n_vertices) {
806       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
807     }
808     ierr = MatDestroy(&B);CHKERRQ(ierr);
809     /* coarse basis functions */
810     for (i=0;i<n_constraints;i++) {
811       PetscScalar *y;
812 
813       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
814       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
815       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
816       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
817       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
818       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
819       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
820 
821       if (pcbddc->switch_static || pcbddc->dbg_flag) {
822         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
823         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
824         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
827         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
828       }
829       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
830     }
831   }
832 
833   /* compute other basis functions for non-symmetric problems */
834   if (!pcbddc->issym) {
835     Mat B,X;
836 
837     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
838 
839     if (n_constraints) {
840       Mat S_VCT,S_CCT,B_C;
841 
842       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
843       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
844       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
845       if (n_vertices) {
846         Mat B_V;
847 
848         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
849         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
850         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
851         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
852       }
853       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
854     }
855     if (n_vertices) {
856       Mat          A_VRT;
857       PetscBLASInt B_N,B_one = 1;
858 
859       if (!n_constraints) { /* if there are no constraints, reset work */
860         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
861       }
862       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
863       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
864       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
865       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
866     }
867 
868     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
869     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
870       for (i=0;i<pcbddc->local_primal_size;i++) {
871         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
872         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
873         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
874         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
875         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
876       }
877     } else {
878       for (i=0;i<pcbddc->local_primal_size;i++) {
879         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
880         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
881         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
882         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
883         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
884       }
885     }
886     ierr = MatDestroy(&B);CHKERRQ(ierr);
887     /* coarse basis functions */
888     for (i=0;i<pcbddc->local_primal_size;i++) {
889       PetscScalar *y;
890 
891       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
892       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
893       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
894       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896       if (i<n_vertices) {
897         y[n_B*i+idx_V_B[i]] = 1.0;
898       }
899       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
900       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
901 
902       if (pcbddc->switch_static || pcbddc->dbg_flag) {
903         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
904         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
905         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
908         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
909       }
910       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
911     }
912     ierr = MatDestroy(&X);CHKERRQ(ierr);
913   }
914 
915   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
916   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
917   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
918   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
919   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
920   /* Checking coarse_sub_mat and coarse basis functios */
921   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
922   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
923   if (pcbddc->dbg_flag) {
924     Mat         coarse_sub_mat;
925     Mat         AUXMAT,TM1,TM2,TM3,TM4;
926     Mat         coarse_phi_D,coarse_phi_B;
927     Mat         coarse_psi_D,coarse_psi_B;
928     Mat         A_II,A_BB,A_IB,A_BI;
929     MatType     checkmattype=MATSEQAIJ;
930     PetscReal   real_value;
931 
932     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
933     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
934     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
935     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
936     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
937     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
938     if (unsymmetric_check) {
939       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
940       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
941     }
942     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
943 
944     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
945     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
946     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
947     if (unsymmetric_check) {
948       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
949       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
950       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
951       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
952       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
953       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
954       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
955       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
956       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
957       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
958       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
959       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
960     } else {
961       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
962       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
963       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
964       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
965       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
966       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
967       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
968       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
969     }
970     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
971     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
972     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
973     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
974     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
975     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
976 
977     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
978     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
979     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
980     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
981     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
982     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
983     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
984     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
985     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
986     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
987     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
988     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
989     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
990     if (unsymmetric_check) {
991       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
992       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
993     }
994     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
995   }
996 
997   /* free memory */
998   ierr = PetscFree(work);CHKERRQ(ierr);
999   if (n_vertices) {
1000     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1001   }
1002   if (n_constraints) {
1003     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1004   }
1005   /* get back data */
1006   *coarse_submat_vals_n = coarse_submat_vals;
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1012 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1013 {
1014   Mat            *work_mat;
1015   IS             isrow_s,iscol_s;
1016   PetscBool      rsorted,csorted;
1017   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1022   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1023   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1024   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1025 
1026   if (!rsorted) {
1027     const PetscInt *idxs;
1028     PetscInt *idxs_sorted,i;
1029 
1030     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1031     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1032     for (i=0;i<rsize;i++) {
1033       idxs_perm_r[i] = i;
1034     }
1035     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1036     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1037     for (i=0;i<rsize;i++) {
1038       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1039     }
1040     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1041     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1042   } else {
1043     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1044     isrow_s = isrow;
1045   }
1046 
1047   if (!csorted) {
1048     if (isrow == iscol) {
1049       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1050       iscol_s = isrow_s;
1051     } else {
1052       const PetscInt *idxs;
1053       PetscInt *idxs_sorted,i;
1054 
1055       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1056       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1057       for (i=0;i<csize;i++) {
1058         idxs_perm_c[i] = i;
1059       }
1060       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1061       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1062       for (i=0;i<csize;i++) {
1063         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1064       }
1065       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1066       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1067     }
1068   } else {
1069     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1070     iscol_s = iscol;
1071   }
1072 
1073   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1074 
1075   if (!rsorted || !csorted) {
1076     Mat      new_mat;
1077     IS       is_perm_r,is_perm_c;
1078 
1079     if (!rsorted) {
1080       PetscInt *idxs_r,i;
1081       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1082       for (i=0;i<rsize;i++) {
1083         idxs_r[idxs_perm_r[i]] = i;
1084       }
1085       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1086       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1087     } else {
1088       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1089     }
1090     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1091 
1092     if (!csorted) {
1093       if (isrow_s == iscol_s) {
1094         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1095         is_perm_c = is_perm_r;
1096       } else {
1097         PetscInt *idxs_c,i;
1098         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1099         for (i=0;i<csize;i++) {
1100           idxs_c[idxs_perm_c[i]] = i;
1101         }
1102         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1103         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1104       }
1105     } else {
1106       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1107     }
1108     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1109 
1110     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1111     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1112     work_mat[0] = new_mat;
1113     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1114     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1115   }
1116 
1117   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1118   *B = work_mat[0];
1119   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1120   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1121   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1127 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1128 {
1129   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1130   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1131   Mat            new_mat;
1132   IS             is_local,is_global;
1133   PetscInt       local_size;
1134   PetscBool      isseqaij;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1139   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1140   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1141   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1142   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1143   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1144   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1145 
1146   /* check */
1147   if (pcbddc->dbg_flag) {
1148     Vec       x,x_change;
1149     PetscReal error;
1150 
1151     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1152     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1153     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1154     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1157     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1158     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1159     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1160     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1161     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1162     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1163     ierr = VecDestroy(&x);CHKERRQ(ierr);
1164     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1165   }
1166 
1167   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1168   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1169   if (isseqaij) {
1170     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1171   } else {
1172     Mat work_mat;
1173     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1174     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1175     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1176   }
1177   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1178   /*
1179   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1180   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1181   */
1182   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1188 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1189 {
1190   PC_IS*         pcis = (PC_IS*)(pc->data);
1191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1192   IS             is_aux1,is_aux2;
1193   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1194   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1195   PetscInt       vbs,bs;
1196   PetscBT        bitmask;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   /*
1201     No need to setup local scatters if
1202       - primal space is unchanged
1203         AND
1204       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1205         AND
1206       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1207   */
1208   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1209     PetscFunctionReturn(0);
1210   }
1211   /* destroy old objects */
1212   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1213   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1214   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1215   /* Set Non-overlapping dimensions */
1216   n_B = pcis->n_B; n_D = pcis->n - n_B;
1217   n_vertices = pcbddc->n_actual_vertices;
1218   /* create auxiliary bitmask */
1219   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1220   for (i=0;i<n_vertices;i++) {
1221     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1222   }
1223 
1224   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1225   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1226   for (i=0, n_R=0; i<pcis->n; i++) {
1227     if (!PetscBTLookup(bitmask,i)) {
1228       idx_R_local[n_R] = i;
1229       n_R++;
1230     }
1231   }
1232 
1233   /* Block code */
1234   vbs = 1;
1235   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1236   if (bs>1 && !(n_vertices%bs)) {
1237     PetscBool is_blocked = PETSC_TRUE;
1238     PetscInt  *vary;
1239     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1240     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1241     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1242     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1243     for (i=0; i<n_vertices; i++) {
1244       if (vary[i]!=0 && vary[i]!=bs) {
1245         is_blocked = PETSC_FALSE;
1246         break;
1247       }
1248     }
1249     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1250       vbs = bs;
1251       for (i=0;i<n_R/vbs;i++) {
1252         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1253       }
1254     }
1255     ierr = PetscFree(vary);CHKERRQ(ierr);
1256   }
1257   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1258   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1259 
1260   /* print some info if requested */
1261   if (pcbddc->dbg_flag) {
1262     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1263     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1264     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1266     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1267     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1268     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1269     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1270   }
1271 
1272   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1273   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1274   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1275   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1276   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1277   for (i=0; i<n_D; i++) {
1278     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1279   }
1280   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1281   for (i=0, j=0; i<n_R; i++) {
1282     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1283       aux_array1[j++] = i;
1284     }
1285   }
1286   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1287   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1288   for (i=0, j=0; i<n_B; i++) {
1289     if (!PetscBTLookup(bitmask,is_indices[i])) {
1290       aux_array2[j++] = i;
1291     }
1292   }
1293   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1294   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1295   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1296   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1297   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1298 
1299   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1300     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1301     for (i=0, j=0; i<n_R; i++) {
1302       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1303         aux_array1[j++] = i;
1304       }
1305     }
1306     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1307     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1308     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1309   }
1310   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1311   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1318 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1319 {
1320   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1321   PC_IS          *pcis = (PC_IS*)pc->data;
1322   PC             pc_temp;
1323   Mat            A_RR;
1324   MatReuse       reuse;
1325   PetscScalar    m_one = -1.0;
1326   PetscReal      value;
1327   PetscInt       n_D,n_R,ibs,mbs;
1328   PetscBool      use_exact,use_exact_reduced,issbaij;
1329   PetscErrorCode ierr;
1330   /* prefixes stuff */
1331   char           dir_prefix[256],neu_prefix[256],str_level[16];
1332   size_t         len;
1333 
1334   PetscFunctionBegin;
1335 
1336   /* compute prefixes */
1337   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1338   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1339   if (!pcbddc->current_level) {
1340     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1341     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1342     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1343     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1344   } else {
1345     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1346     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1347     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1348     len -= 15; /* remove "pc_bddc_coarse_" */
1349     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1350     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1351     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1352     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1353     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1354     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1355     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1356     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1357   }
1358 
1359   /* DIRICHLET PROBLEM */
1360   if (dirichlet) {
1361     /* Matrix for Dirichlet problem is pcis->A_II */
1362     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1363     if (!pcbddc->ksp_D) { /* create object if not yet build */
1364       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1365       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1366       /* default */
1367       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1368       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1369       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1370       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1371       if (issbaij) {
1372         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1373       } else {
1374         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1375       }
1376       /* Allow user's customization */
1377       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1378       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1379     }
1380     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1381     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1382     if (!n_D) {
1383       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1384       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1385     }
1386     /* Set Up KSP for Dirichlet problem of BDDC */
1387     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1388     /* set ksp_D into pcis data */
1389     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1390     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1391     pcis->ksp_D = pcbddc->ksp_D;
1392   }
1393 
1394   /* NEUMANN PROBLEM */
1395   A_RR = 0;
1396   if (neumann) {
1397     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1398     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1399     if (pcbddc->ksp_R) { /* already created ksp */
1400       PetscInt nn_R;
1401       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1402       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1403       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1404       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1405         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1406         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1407         reuse = MAT_INITIAL_MATRIX;
1408       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1409         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1410           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1411           reuse = MAT_INITIAL_MATRIX;
1412         } else { /* safe to reuse the matrix */
1413           reuse = MAT_REUSE_MATRIX;
1414         }
1415       }
1416       /* last check */
1417       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1418         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1419         reuse = MAT_INITIAL_MATRIX;
1420       }
1421     } else { /* first time, so we need to create the matrix */
1422       reuse = MAT_INITIAL_MATRIX;
1423     }
1424     /* extract A_RR */
1425     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1426     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1427     if (ibs != mbs) {
1428       Mat newmat;
1429       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1430       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1431       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1432     } else {
1433       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1434     }
1435     if (!pcbddc->ksp_R) { /* create object if not present */
1436       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1437       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1438       /* default */
1439       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1440       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1441       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1442       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1443       if (issbaij) {
1444         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1445       } else {
1446         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1447       }
1448       /* Allow user's customization */
1449       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1450       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1451     }
1452     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1453     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1454     if (!n_R) {
1455       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1456       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1457     }
1458     /* Set Up KSP for Neumann problem of BDDC */
1459     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1460   }
1461 
1462   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1463   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1464     if (pcbddc->dbg_flag) {
1465       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1468     }
1469     if (dirichlet) { /* Dirichlet */
1470       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1471       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1472       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1473       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1474       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1475       /* need to be adapted? */
1476       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1477       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1478       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1479       /* print info */
1480       if (pcbddc->dbg_flag) {
1481         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1482         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1483       }
1484       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1485         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1486       }
1487     }
1488     if (neumann) { /* Neumann */
1489       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1490       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1491       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1492       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1493       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1494       /* need to be adapted? */
1495       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1496       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1497       /* print info */
1498       if (pcbddc->dbg_flag) {
1499         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1500         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1501       }
1502       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1503         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1504       }
1505     }
1506   }
1507   /* free Neumann problem's matrix */
1508   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1514 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1515 {
1516   PetscErrorCode ierr;
1517   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1518 
1519   PetscFunctionBegin;
1520   if (applytranspose) {
1521     if (pcbddc->local_auxmat1) {
1522       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1523       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1524     }
1525     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1526   } else {
1527     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1528     if (pcbddc->local_auxmat1) {
1529       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1530       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1531     }
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1537 #undef __FUNCT__
1538 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1539 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1540 {
1541   PetscErrorCode ierr;
1542   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1543   PC_IS*            pcis = (PC_IS*)  (pc->data);
1544   const PetscScalar zero = 0.0;
1545 
1546   PetscFunctionBegin;
1547   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1548   if (applytranspose) {
1549     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1550     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1551   } else {
1552     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1553     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1554   }
1555   /* start communications from local primal nodes to rhs of coarse solver */
1556   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1557   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1558   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1559 
1560   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1561   /* TODO remove null space when doing multilevel */
1562   if (pcbddc->coarse_ksp) {
1563     if (applytranspose) {
1564       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1565     } else {
1566       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1567     }
1568   }
1569 
1570   /* Local solution on R nodes */
1571   if (pcis->n) {
1572     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1573     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1574     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1575     if (pcbddc->switch_static) {
1576       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1577       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1578     }
1579     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1580     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1581     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583     if (pcbddc->switch_static) {
1584       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1586     }
1587   }
1588 
1589   /* communications from coarse sol to local primal nodes */
1590   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1591   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1592 
1593   /* Sum contributions from two levels */
1594   if (applytranspose) {
1595     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1596     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1597   } else {
1598     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1599     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1600   }
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1605 #undef __FUNCT__
1606 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1607 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1608 {
1609   PetscErrorCode ierr;
1610   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1611   PetscScalar    *array,*array2;
1612   Vec            from,to;
1613 
1614   PetscFunctionBegin;
1615   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1616     from = pcbddc->coarse_vec;
1617     to = pcbddc->vec1_P;
1618     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1619       Vec tvec;
1620       PetscInt lsize;
1621       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1622       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1623       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1624       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1625       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1626       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1627       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1628     }
1629   } else { /* from local to global -> put data in coarse right hand side */
1630     from = pcbddc->vec1_P;
1631     to = pcbddc->coarse_vec;
1632   }
1633   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1639 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1640 {
1641   PetscErrorCode ierr;
1642   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1643   PetscScalar    *array,*array2;
1644   Vec            from,to;
1645 
1646   PetscFunctionBegin;
1647   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1648     from = pcbddc->coarse_vec;
1649     to = pcbddc->vec1_P;
1650   } else { /* from local to global -> put data in coarse right hand side */
1651     from = pcbddc->vec1_P;
1652     to = pcbddc->coarse_vec;
1653   }
1654   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1655   if (smode == SCATTER_FORWARD) {
1656     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1657       Vec tvec;
1658       PetscInt lsize;
1659       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1660       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1661       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1662       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1663       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1664       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1665       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1666     }
1667   }
1668   PetscFunctionReturn(0);
1669 }
1670 
1671 /* uncomment for testing purposes */
1672 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1673 #undef __FUNCT__
1674 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1675 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1676 {
1677   PetscErrorCode    ierr;
1678   PC_IS*            pcis = (PC_IS*)(pc->data);
1679   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1680   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1681   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1682   MatType           impMatType=MATSEQAIJ;
1683   /* one and zero */
1684   PetscScalar       one=1.0,zero=0.0;
1685   /* space to store constraints and their local indices */
1686   PetscScalar       *temp_quadrature_constraint;
1687   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1688   /* iterators */
1689   PetscInt          i,j,k,total_counts,temp_start_ptr;
1690   /* stuff to store connected components stored in pcbddc->mat_graph */
1691   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1692   PetscInt          n_ISForFaces,n_ISForEdges;
1693   /* near null space stuff */
1694   MatNullSpace      nearnullsp;
1695   const Vec         *nearnullvecs;
1696   Vec               *localnearnullsp;
1697   PetscBool         nnsp_has_cnst;
1698   PetscInt          nnsp_size;
1699   PetscScalar       *array;
1700   /* BLAS integers */
1701   PetscBLASInt      lwork,lierr;
1702   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1703   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1704   /* LAPACK working arrays for SVD or POD */
1705   PetscBool         skip_lapack;
1706   PetscScalar       *work;
1707   PetscReal         *singular_vals;
1708 #if defined(PETSC_USE_COMPLEX)
1709   PetscReal         *rwork;
1710 #endif
1711 #if defined(PETSC_MISSING_LAPACK_GESVD)
1712   PetscBLASInt      Blas_one_2=1;
1713   PetscScalar       *temp_basis,*correlation_mat;
1714 #else
1715   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1716   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1717 #endif
1718   /* reuse */
1719   PetscInt          olocal_primal_size;
1720   PetscInt          *oprimal_indices_local_idxs;
1721   /* change of basis */
1722   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1723   PetscBool         boolforchange,qr_needed;
1724   PetscBT           touched,change_basis,qr_needed_idx;
1725   /* auxiliary stuff */
1726   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1727   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1728   PetscScalar       *temp_quadrature_constraint_work=NULL;
1729   /* some quantities */
1730   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1731   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1732 
1733 
1734   PetscFunctionBegin;
1735   /* Destroy Mat objects computed previously */
1736   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1737   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1738   /* TODO synch with adaptive selection */
1739   /* Get index sets for faces, edges and vertices from graph */
1740   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1741   /* free unneeded index sets */
1742   if (!pcbddc->use_vertices) {
1743     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1744   }
1745   if (!pcbddc->use_edges) {
1746     for (i=0;i<n_ISForEdges;i++) {
1747       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1748     }
1749     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1750     n_ISForEdges = 0;
1751   }
1752   if (!pcbddc->use_faces) {
1753     for (i=0;i<n_ISForFaces;i++) {
1754       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1755     }
1756     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1757     n_ISForFaces = 0;
1758   }
1759   /* HACKS (the following two blocks of code) */
1760   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1761     pcbddc->use_change_of_basis = PETSC_TRUE;
1762     if (!ISForEdges) {
1763       pcbddc->use_change_on_faces = PETSC_TRUE;
1764     }
1765   }
1766   if (pcbddc->NullSpace) {
1767     /* use_change_of_basis should be consistent among processors */
1768     PetscBool tbool[2],gbool[2];
1769     tbool [0] = pcbddc->use_change_of_basis;
1770     tbool [1] = pcbddc->use_change_on_faces;
1771     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1772     pcbddc->use_change_of_basis = gbool[0];
1773     pcbddc->use_change_on_faces = gbool[1];
1774   }
1775   /* print some info */
1776   if (pcbddc->dbg_flag) {
1777     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1778     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1779     i = 0;
1780     if (ISForVertices) {
1781       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1782     }
1783     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1784     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1785     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1786     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1787   }
1788 
1789   if (!pcbddc->adaptive_selection) {
1790     /* check if near null space is attached to global mat */
1791     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1792     if (nearnullsp) {
1793       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1794       /* remove any stored info */
1795       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1796       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1797       /* store information for BDDC solver reuse */
1798       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1799       pcbddc->onearnullspace = nearnullsp;
1800       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1801       for (i=0;i<nnsp_size;i++) {
1802         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1803       }
1804     } else { /* if near null space is not provided BDDC uses constants by default */
1805       nnsp_size = 0;
1806       nnsp_has_cnst = PETSC_TRUE;
1807     }
1808     /* get max number of constraints on a single cc */
1809     max_constraints = nnsp_size;
1810     if (nnsp_has_cnst) max_constraints++;
1811 
1812     /*
1813          Evaluate maximum storage size needed by the procedure
1814          - temp_indices will contain start index of each constraint stored as follows
1815          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1816          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1817          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1818                                                                                                                                                            */
1819     total_counts = n_ISForFaces+n_ISForEdges;
1820     total_counts *= max_constraints;
1821     n_vertices = 0;
1822     if (ISForVertices) {
1823       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1824     }
1825     total_counts += n_vertices;
1826     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1827     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1828     total_counts = 0;
1829     max_size_of_constraint = 0;
1830     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1831       if (i<n_ISForEdges) {
1832         used_IS = &ISForEdges[i];
1833       } else {
1834         used_IS = &ISForFaces[i-n_ISForEdges];
1835       }
1836       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1837       total_counts += j;
1838       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1839     }
1840     total_counts *= max_constraints;
1841     total_counts += n_vertices;
1842     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1843     /* get local part of global near null space vectors */
1844     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1845     for (k=0;k<nnsp_size;k++) {
1846       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1847       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1848       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1849     }
1850 
1851     /* whether or not to skip lapack calls */
1852     skip_lapack = PETSC_TRUE;
1853     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1854 
1855     /* allocate some auxiliary stuff */
1856     if (!skip_lapack || pcbddc->use_qr_single) {
1857       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1858     }
1859 
1860     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1861     if (!skip_lapack) {
1862       PetscScalar temp_work;
1863 
1864 #if defined(PETSC_MISSING_LAPACK_GESVD)
1865       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1866       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1867       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1868       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1869 #if defined(PETSC_USE_COMPLEX)
1870       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1871 #endif
1872       /* now we evaluate the optimal workspace using query with lwork=-1 */
1873       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1874       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1875       lwork = -1;
1876       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1877 #if !defined(PETSC_USE_COMPLEX)
1878       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1879 #else
1880       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1881 #endif
1882       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1883       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1884 #else /* on missing GESVD */
1885       /* SVD */
1886       PetscInt max_n,min_n;
1887       max_n = max_size_of_constraint;
1888       min_n = max_constraints;
1889       if (max_size_of_constraint < max_constraints) {
1890         min_n = max_size_of_constraint;
1891         max_n = max_constraints;
1892       }
1893       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1894 #if defined(PETSC_USE_COMPLEX)
1895       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1896 #endif
1897       /* now we evaluate the optimal workspace using query with lwork=-1 */
1898       lwork = -1;
1899       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1900       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1901       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1902       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1903 #if !defined(PETSC_USE_COMPLEX)
1904       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1905 #else
1906       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1907 #endif
1908       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1909       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1910 #endif /* on missing GESVD */
1911       /* Allocate optimal workspace */
1912       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1913       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1914     }
1915     /* Now we can loop on constraining sets */
1916     total_counts = 0;
1917     temp_indices[0] = 0;
1918     /* vertices */
1919     if (ISForVertices) {
1920       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1921       if (nnsp_has_cnst) { /* consider all vertices */
1922         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1923         for (i=0;i<n_vertices;i++) {
1924           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1925           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1926           total_counts++;
1927         }
1928       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1929         PetscBool used_vertex;
1930         for (i=0;i<n_vertices;i++) {
1931           used_vertex = PETSC_FALSE;
1932           k = 0;
1933           while (!used_vertex && k<nnsp_size) {
1934             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1935             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1936               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1937               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1938               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1939               total_counts++;
1940               used_vertex = PETSC_TRUE;
1941             }
1942             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1943             k++;
1944           }
1945         }
1946       }
1947       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1948       n_vertices = total_counts;
1949     }
1950 
1951     /* edges and faces */
1952     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1953       if (ncc<n_ISForEdges) {
1954         used_IS = &ISForEdges[ncc];
1955         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1956       } else {
1957         used_IS = &ISForFaces[ncc-n_ISForEdges];
1958         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1959       }
1960       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1961       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1962       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1963       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1964       /* change of basis should not be performed on local periodic nodes */
1965       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1966       if (nnsp_has_cnst) {
1967         PetscScalar quad_value;
1968         temp_constraints++;
1969         if (!pcbddc->use_nnsp_true) {
1970           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1971         } else {
1972           quad_value = 1.0;
1973         }
1974         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1975         for (j=0;j<size_of_constraint;j++) {
1976           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1977         }
1978         /* sort by global ordering if using lapack subroutines (not needed!) */
1979         if (!skip_lapack || pcbddc->use_qr_single) {
1980           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1981           for (j=0;j<size_of_constraint;j++) {
1982             permutation[j]=j;
1983           }
1984           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1985           for (j=0;j<size_of_constraint;j++) {
1986             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
1987           }
1988           for (j=0;j<size_of_constraint;j++) {
1989             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1990             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1991           }
1992           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1993           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1994         }
1995         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1996         total_counts++;
1997       }
1998       for (k=0;k<nnsp_size;k++) {
1999         PetscReal real_value;
2000         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2001         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2002         for (j=0;j<size_of_constraint;j++) {
2003           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2004         }
2005         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2006         /* check if array is null on the connected component */
2007         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2008         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2009         if (real_value > 0.0) { /* keep indices and values */
2010           /* sort by global ordering if using lapack subroutines */
2011           if (!skip_lapack || pcbddc->use_qr_single) {
2012             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2013             for (j=0;j<size_of_constraint;j++) {
2014               permutation[j]=j;
2015             }
2016             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2017             for (j=0;j<size_of_constraint;j++) {
2018               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2019               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2020             }
2021             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2022             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2023           }
2024           temp_constraints++;
2025           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2026           total_counts++;
2027         }
2028       }
2029       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2030       valid_constraints = temp_constraints;
2031       if (!pcbddc->use_nnsp_true && temp_constraints) {
2032         if (temp_constraints == 1) { /* just normalize the constraint */
2033           PetscScalar norm;
2034           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2035           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2036           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2037           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2038         } else { /* perform SVD */
2039           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2040 
2041 #if defined(PETSC_MISSING_LAPACK_GESVD)
2042           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2043              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2044              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2045                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2046                 from that computed using LAPACKgesvd
2047              -> This is due to a different computation of eigenvectors in LAPACKheev
2048              -> The quality of the POD-computed basis will be the same */
2049           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2050           /* Store upper triangular part of correlation matrix */
2051           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2052           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2053           for (j=0;j<temp_constraints;j++) {
2054             for (k=0;k<j+1;k++) {
2055               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2056             }
2057           }
2058           /* compute eigenvalues and eigenvectors of correlation matrix */
2059           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2060           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2061 #if !defined(PETSC_USE_COMPLEX)
2062           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2063 #else
2064           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2065 #endif
2066           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2067           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2068           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2069           j = 0;
2070           while (j < temp_constraints && singular_vals[j] < tol) j++;
2071           total_counts = total_counts-j;
2072           valid_constraints = temp_constraints-j;
2073           /* scale and copy POD basis into used quadrature memory */
2074           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2075           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2076           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2077           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2078           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2079           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2080           if (j<temp_constraints) {
2081             PetscInt ii;
2082             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2083             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2084             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2085             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2086             for (k=0;k<temp_constraints-j;k++) {
2087               for (ii=0;ii<size_of_constraint;ii++) {
2088                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2089               }
2090             }
2091           }
2092 #else  /* on missing GESVD */
2093           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2094           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2095           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2096           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2097 #if !defined(PETSC_USE_COMPLEX)
2098           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2099 #else
2100           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2101 #endif
2102           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2103           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2104           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2105           k = temp_constraints;
2106           if (k > size_of_constraint) k = size_of_constraint;
2107           j = 0;
2108           while (j < k && singular_vals[k-j-1] < tol) j++;
2109           valid_constraints = k-j;
2110           total_counts = total_counts-temp_constraints+valid_constraints;
2111 #endif /* on missing GESVD */
2112         }
2113       }
2114       /* setting change_of_basis flag is safe now */
2115       if (boolforchange) {
2116         for (j=0;j<valid_constraints;j++) {
2117           PetscBTSet(change_basis,total_counts-j-1);
2118         }
2119       }
2120     }
2121     /* free workspace */
2122     if (!skip_lapack || pcbddc->use_qr_single) {
2123       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2124     }
2125     if (!skip_lapack) {
2126       ierr = PetscFree(work);CHKERRQ(ierr);
2127 #if defined(PETSC_USE_COMPLEX)
2128       ierr = PetscFree(rwork);CHKERRQ(ierr);
2129 #endif
2130       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2131 #if defined(PETSC_MISSING_LAPACK_GESVD)
2132       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2133       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2134 #endif
2135     }
2136     for (k=0;k<nnsp_size;k++) {
2137       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2138     }
2139     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2140   } else {
2141     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2142     PetscInt        cum = 0;
2143 
2144     total_counts = 0;
2145     n_vertices = 0;
2146     if (sub_schurs->is_Ej_com) {
2147       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2148     }
2149     max_constraints = 0;
2150     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2151       total_counts += pcbddc->adaptive_constraints_n[i];
2152       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2153     }
2154     temp_indices = pcbddc->adaptive_constraints_ptrs;
2155     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2156     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2157 
2158 #if 0
2159     printf("Found %d totals\n",total_counts);
2160     for (i=0;i<total_counts;i++) {
2161       printf("const %d, start %d",i,temp_indices[i]);
2162       printf(" end %d:\n",temp_indices[i+1]);
2163       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2164         printf("  idxs %d",temp_indices_to_constraint[j]);
2165         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2166       }
2167     }
2168     for (i=0;i<n_vertices;i++) {
2169       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i+n_vertices]);
2170     }
2171     for (i=0;i<sub_schurs->n_subs;i++) {
2172       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2173     }
2174 #endif
2175 
2176     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2177     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2178     /* Change of basis */
2179     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2180     if (pcbddc->use_change_of_basis) {
2181       cum = n_vertices;
2182       for (i=0;i<sub_schurs->n_subs;i++) {
2183         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2184           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2185             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2186           }
2187         }
2188         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2189       }
2190     }
2191   }
2192 
2193   /* free index sets of faces, edges and vertices */
2194   for (i=0;i<n_ISForFaces;i++) {
2195     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2196   }
2197   if (n_ISForFaces) {
2198     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2199   }
2200   for (i=0;i<n_ISForEdges;i++) {
2201     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2202   }
2203   if (n_ISForEdges) {
2204     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2205   }
2206   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2207 
2208   /* map temp_indices_to_constraint in boundary numbering */
2209   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2210   if (i != temp_indices[total_counts]) {
2211     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2212   }
2213 
2214   /* set quantities in pcbddc data structure and store previous primal size */
2215   /* n_vertices defines the number of subdomain corners in the primal space */
2216   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2217   olocal_primal_size = pcbddc->local_primal_size;
2218   pcbddc->local_primal_size = total_counts;
2219   pcbddc->n_vertices = n_vertices;
2220   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2221 
2222   /* Create constraint matrix */
2223   /* The constraint matrix is used to compute the l2g map of primal dofs */
2224   /* so we need to set it up properly either with or without change of basis */
2225   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2226   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
2227   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2228   /* array to compute a local numbering of constraints : vertices first then constraints */
2229   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2230   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2231   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2232   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2233   /* auxiliary stuff for basis change */
2234   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2235   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2236 
2237   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2238   total_primal_vertices=0;
2239   for (i=0;i<pcbddc->local_primal_size;i++) {
2240     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2241     if (size_of_constraint == 1) {
2242       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2243       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2244       aux_primal_minloc[total_primal_vertices]=0;
2245       total_primal_vertices++;
2246     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2247       PetscInt min_loc,min_index;
2248       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2249       /* find first untouched local node */
2250       k = 0;
2251       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2252       min_index = global_indices[k];
2253       min_loc = k;
2254       /* search the minimum among global nodes already untouched on the cc */
2255       for (k=1;k<size_of_constraint;k++) {
2256         /* there can be more than one constraint on a single connected component */
2257         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2258           min_index = global_indices[k];
2259           min_loc = k;
2260         }
2261       }
2262       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2263       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2264       aux_primal_minloc[total_primal_vertices]=min_loc;
2265       total_primal_vertices++;
2266     }
2267   }
2268   /* determine if a QR strategy is needed for change of basis */
2269   qr_needed = PETSC_FALSE;
2270   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2271   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2272     if (PetscBTLookup(change_basis,i)) {
2273       if (!pcbddc->use_qr_single) {
2274         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2275         j = 0;
2276         for (k=0;k<size_of_constraint;k++) {
2277           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2278             j++;
2279           }
2280         }
2281         /* found more than one primal dof on the cc */
2282         if (j > 1) {
2283           PetscBTSet(qr_needed_idx,i);
2284           qr_needed = PETSC_TRUE;
2285         }
2286       } else {
2287         PetscBTSet(qr_needed_idx,i);
2288         qr_needed = PETSC_TRUE;
2289       }
2290     }
2291   }
2292   /* free workspace */
2293   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2294 
2295   /* permute indices in order to have a sorted set of vertices */
2296   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2297 
2298   /* nonzero structure of constraint matrix */
2299   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2300   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2301   j=total_primal_vertices;
2302   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2303     if (!PetscBTLookup(change_basis,i)) {
2304       nnz[j]=temp_indices[i+1]-temp_indices[i];
2305       j++;
2306     }
2307   }
2308   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2309   ierr = PetscFree(nnz);CHKERRQ(ierr);
2310   /* set values in constraint matrix */
2311   for (i=0;i<total_primal_vertices;i++) {
2312     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2313   }
2314   total_counts = total_primal_vertices;
2315   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2316     if (!PetscBTLookup(change_basis,i)) {
2317       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2318       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2319       total_counts++;
2320     }
2321   }
2322   /* assembling */
2323   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2324   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2325   /*
2326   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2327   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2328   */
2329   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2330   if (pcbddc->use_change_of_basis) {
2331     /* dual and primal dofs on a single cc */
2332     PetscInt     dual_dofs,primal_dofs;
2333     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2334     PetscInt     primal_counter;
2335     /* working stuff for GEQRF */
2336     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2337     PetscBLASInt lqr_work;
2338     /* working stuff for UNGQR */
2339     PetscScalar  *gqr_work,lgqr_work_t;
2340     PetscBLASInt lgqr_work;
2341     /* working stuff for TRTRS */
2342     PetscScalar  *trs_rhs;
2343     PetscBLASInt Blas_NRHS;
2344     /* pointers for values insertion into change of basis matrix */
2345     PetscInt     *start_rows,*start_cols;
2346     PetscScalar  *start_vals;
2347     /* working stuff for values insertion */
2348     PetscBT      is_primal;
2349     /* matrix sizes */
2350     PetscInt     global_size,local_size;
2351     /* temporary change of basis */
2352     Mat          localChangeOfBasisMatrix;
2353 
2354     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2355     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2356     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2357     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2358 
2359     /* nonzeros for local mat */
2360     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2361     for (i=0;i<pcis->n;i++) nnz[i]=1;
2362     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2363       if (PetscBTLookup(change_basis,i)) {
2364         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2365         if (PetscBTLookup(qr_needed_idx,i)) {
2366           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2367         } else {
2368           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2369           /* get local primal index on the cc */
2370           j = 0;
2371           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2372           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2373         }
2374       }
2375     }
2376     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2377     ierr = PetscFree(nnz);CHKERRQ(ierr);
2378     /* Set initial identity in the matrix */
2379     for (i=0;i<pcis->n;i++) {
2380       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2381     }
2382 
2383     if (pcbddc->dbg_flag) {
2384       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2385       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2386     }
2387 
2388 
2389     /* Now we loop on the constraints which need a change of basis */
2390     /*
2391        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2392        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2393 
2394        Basic blocks of change of basis matrix T computed by
2395 
2396           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2397 
2398             | 1        0   ...        0         s_1/S |
2399             | 0        1   ...        0         s_2/S |
2400             |              ...                        |
2401             | 0        ...            1     s_{n-1}/S |
2402             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2403 
2404             with S = \sum_{i=1}^n s_i^2
2405             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2406                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2407 
2408           - QR decomposition of constraints otherwise
2409     */
2410     if (qr_needed) {
2411       /* space to store Q */
2412       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2413       /* first we issue queries for optimal work */
2414       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2415       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2416       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2417       lqr_work = -1;
2418       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2419       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2420       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2421       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2422       lgqr_work = -1;
2423       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2424       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2425       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2426       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2427       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2428       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2429       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2430       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2431       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2432       /* array to store scaling factors for reflectors */
2433       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2434       /* array to store rhs and solution of triangular solver */
2435       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2436       /* allocating workspace for check */
2437       if (pcbddc->dbg_flag) {
2438         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2439       }
2440     }
2441     /* array to store whether a node is primal or not */
2442     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2443     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2444     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2445     if (i != total_primal_vertices) {
2446       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2447     }
2448     for (i=0;i<total_primal_vertices;i++) {
2449       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2450     }
2451     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2452 
2453     /* loop on constraints and see whether or not they need a change of basis and compute it */
2454     /* -> using implicit ordering contained in temp_indices data */
2455     total_counts = pcbddc->n_vertices;
2456     primal_counter = total_counts;
2457     while (total_counts<pcbddc->local_primal_size) {
2458       primal_dofs = 1;
2459       if (PetscBTLookup(change_basis,total_counts)) {
2460         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2461         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2462           primal_dofs++;
2463         }
2464         /* get constraint info */
2465         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2466         dual_dofs = size_of_constraint-primal_dofs;
2467 
2468         if (pcbddc->dbg_flag) {
2469           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2470         }
2471 
2472         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2473 
2474           /* copy quadrature constraints for change of basis check */
2475           if (pcbddc->dbg_flag) {
2476             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2477           }
2478           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2479           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2480 
2481           /* compute QR decomposition of constraints */
2482           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2483           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2484           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2485           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2486           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2487           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2488           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2489 
2490           /* explictly compute R^-T */
2491           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2492           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2493           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2494           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2495           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2496           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2497           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2498           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2499           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2500           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2501 
2502           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2503           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2504           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2505           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2506           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2507           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2508           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2509           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2510           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2511 
2512           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2513              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2514              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2515           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2516           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2517           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2518           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2519           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2520           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2521           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2522           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2523           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2524           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2525 
2526           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2527           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2528           /* insert cols for primal dofs */
2529           for (j=0;j<primal_dofs;j++) {
2530             start_vals = &qr_basis[j*size_of_constraint];
2531             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2532             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2533           }
2534           /* insert cols for dual dofs */
2535           for (j=0,k=0;j<dual_dofs;k++) {
2536             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2537               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2538               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2539               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2540               j++;
2541             }
2542           }
2543 
2544           /* check change of basis */
2545           if (pcbddc->dbg_flag) {
2546             PetscInt   ii,jj;
2547             PetscBool valid_qr=PETSC_TRUE;
2548             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2549             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2550             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2551             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2552             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2553             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2554             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2555             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2556             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2557             for (jj=0;jj<size_of_constraint;jj++) {
2558               for (ii=0;ii<primal_dofs;ii++) {
2559                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2560                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2561               }
2562             }
2563             if (!valid_qr) {
2564               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2565               for (jj=0;jj<size_of_constraint;jj++) {
2566                 for (ii=0;ii<primal_dofs;ii++) {
2567                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2568                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2569                   }
2570                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2571                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2572                   }
2573                 }
2574               }
2575             } else {
2576               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2577             }
2578           }
2579         } else { /* simple transformation block */
2580           PetscInt    row,col;
2581           PetscScalar val,norm;
2582 
2583           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2584           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2585           for (j=0;j<size_of_constraint;j++) {
2586             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2587             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2588             if (!PetscBTLookup(is_primal,row_B)) {
2589               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2590               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2591               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2592             } else {
2593               for (k=0;k<size_of_constraint;k++) {
2594                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2595                 if (row != col) {
2596                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2597                 } else {
2598                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2599                 }
2600                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2601               }
2602             }
2603           }
2604           if (pcbddc->dbg_flag) {
2605             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2606           }
2607         }
2608         /* increment primal counter */
2609         primal_counter += primal_dofs;
2610       } else {
2611         if (pcbddc->dbg_flag) {
2612           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2613         }
2614       }
2615       /* increment constraint counter total_counts */
2616       total_counts += primal_dofs;
2617     }
2618 
2619     /* free workspace */
2620     if (qr_needed) {
2621       if (pcbddc->dbg_flag) {
2622         ierr = PetscFree(work);CHKERRQ(ierr);
2623       }
2624       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2625       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2626       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2627       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2628       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2629     }
2630     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2631     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2632     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2633 
2634     /* assembling of global change of variable */
2635     {
2636       Mat tmat;
2637       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2638       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2639       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2640       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2641       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2642       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2643       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2644       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2645       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2646       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2647       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2648       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2649       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2650       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2651       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2652       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2653     }
2654     /* check */
2655     if (pcbddc->dbg_flag) {
2656       PetscReal error;
2657       Vec       x,x_change;
2658 
2659       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2660       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2661       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2662       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2663       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2664       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2665       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2666       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2667       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2668       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2669       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2670       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2671       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2672       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2673       ierr = VecDestroy(&x);CHKERRQ(ierr);
2674       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2675     }
2676 
2677     /* adapt sub_schurs computed (if any) */
2678     if (pcbddc->use_deluxe_scaling) {
2679       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2680       if (sub_schurs->n_subs_par_g) {
2681         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2682       }
2683       if (sub_schurs->S_Ej_all) {
2684         Mat S_1,S_2,tmat;
2685         IS is_all_N;
2686 
2687         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2688         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2689         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2690         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2691         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2692         sub_schurs->S_Ej_all = S_1;
2693         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2694         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2695         sub_schurs->sum_S_Ej_all = S_2;
2696         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2697       }
2698     }
2699     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2700   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2701     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2702     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2703   }
2704 
2705   /* set up change of basis context */
2706   if (pcbddc->ChangeOfBasisMatrix) {
2707     PCBDDCChange_ctx change_ctx;
2708 
2709     if (!pcbddc->new_global_mat) {
2710       PetscInt global_size,local_size;
2711 
2712       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2713       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2714       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2715       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2716       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2717       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2718       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2719       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2720       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2721     } else {
2722       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2723       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2724       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2725     }
2726     if (!pcbddc->user_ChangeOfBasisMatrix) {
2727       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2728       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2729     } else {
2730       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2731       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2732     }
2733     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2734     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2735     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2736     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2737   }
2738 
2739   /* get indices in local ordering for vertices and constraints */
2740   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2741     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2742     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2743   }
2744   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2745   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2746   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2747   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2748   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2749   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2750   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2751   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2752   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2753   /* set quantities in PCBDDC data struct */
2754   pcbddc->n_actual_vertices = i;
2755   /* check if a new primal space has been introduced */
2756   pcbddc->new_primal_space_local = PETSC_TRUE;
2757   if (olocal_primal_size == pcbddc->local_primal_size) {
2758     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2759     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2760     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2761   }
2762   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2763   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2764 
2765   /* flush dbg viewer */
2766   if (pcbddc->dbg_flag) {
2767     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2768   }
2769 
2770   /* free workspace */
2771   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2772   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2773   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2774   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2775   if (!pcbddc->adaptive_selection) {
2776     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2777     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2778   } else {
2779     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2780                       pcbddc->adaptive_constraints_ptrs,
2781                       pcbddc->adaptive_constraints_idxs,
2782                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2783     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2784   }
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2790 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2791 {
2792   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2793   PC_IS       *pcis = (PC_IS*)pc->data;
2794   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2795   PetscInt    ierr,i,vertex_size;
2796   PetscViewer viewer=pcbddc->dbg_viewer;
2797 
2798   PetscFunctionBegin;
2799   /* Reset previously computed graph */
2800   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2801   /* Init local Graph struct */
2802   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2803 
2804   /* Check validity of the csr graph passed in by the user */
2805   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2806     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2807   }
2808 
2809   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2810   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2811     Mat       mat_adj;
2812     PetscInt  *xadj,*adjncy;
2813     PetscInt  nvtxs;
2814     PetscBool flg_row=PETSC_TRUE;
2815 
2816     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2817     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2818     if (!flg_row) {
2819       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2820     }
2821     if (pcbddc->use_local_adj) {
2822       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2823       pcbddc->computed_rowadj = PETSC_TRUE;
2824     } else { /* just compute subdomain's connected components */
2825       IS                     is_dummy;
2826       ISLocalToGlobalMapping l2gmap_dummy;
2827       PetscInt               j,sum;
2828       PetscInt               *cxadj,*cadjncy;
2829       const PetscInt         *idxs;
2830       PCBDDCGraph            graph;
2831       PetscBT                is_on_boundary;
2832 
2833       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2834       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2835       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2836       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2837       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2838       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2839       graph->xadj = xadj;
2840       graph->adjncy = adjncy;
2841       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2842       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2843 
2844       if (pcbddc->dbg_flag) {
2845         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2846         for (i=0;i<graph->ncc;i++) {
2847           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2848         }
2849         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2850       }
2851 
2852       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2853       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2854       for (i=0;i<pcis->n_B;i++) {
2855         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2856       }
2857       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2858 
2859       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2860       sum = 0;
2861       for (i=0;i<graph->ncc;i++) {
2862         PetscInt sizecc = 0;
2863         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2864           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2865             sizecc++;
2866           }
2867         }
2868         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2869           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2870             cxadj[graph->queue[j]] = sizecc;
2871           }
2872         }
2873         sum += sizecc*sizecc;
2874       }
2875       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2876       sum = 0;
2877       for (i=0;i<nvtxs;i++) {
2878         PetscInt temp = cxadj[i];
2879         cxadj[i] = sum;
2880         sum += temp;
2881       }
2882       cxadj[nvtxs] = sum;
2883       for (i=0;i<graph->ncc;i++) {
2884         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2885           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2886             PetscInt k,sizecc = 0;
2887             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2888               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2889                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2890                 sizecc++;
2891               }
2892             }
2893           }
2894         }
2895       }
2896       if (nvtxs) {
2897         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2898       } else {
2899         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2900         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2901       }
2902       graph->xadj = 0;
2903       graph->adjncy = 0;
2904       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2905       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2906     }
2907     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2908     if (!flg_row) {
2909       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2910     }
2911     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2912   }
2913 
2914   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2915   vertex_size = 1;
2916   if (pcbddc->user_provided_isfordofs) {
2917     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2918       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2919       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2920         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2921         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2922       }
2923       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2924       pcbddc->n_ISForDofs = 0;
2925       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2926     }
2927     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2928     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2929   } else {
2930     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2931       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2932       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2933       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2934         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2935       }
2936     }
2937   }
2938 
2939   /* Setup of Graph */
2940   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2941     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2942   }
2943   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2944     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2945   }
2946   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2947 
2948   /* Graph's connected components analysis */
2949   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2950 
2951   /* print some info to stdout */
2952   if (pcbddc->dbg_flag) {
2953     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2954   }
2955 
2956   /* mark topography has done */
2957   pcbddc->recompute_topography = PETSC_FALSE;
2958   PetscFunctionReturn(0);
2959 }
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2963 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2964 {
2965   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2966   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2967   PetscErrorCode ierr;
2968 
2969   PetscFunctionBegin;
2970   n = 0;
2971   vertices = 0;
2972   if (pcbddc->ConstraintMatrix) {
2973     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2974     for (i=0;i<local_primal_size;i++) {
2975       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2976       if (size_of_constraint == 1) n++;
2977       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2978     }
2979     if (vertices_idx) {
2980       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2981       n = 0;
2982       for (i=0;i<local_primal_size;i++) {
2983         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2984         if (size_of_constraint == 1) {
2985           vertices[n++]=row_cmat_indices[0];
2986         }
2987         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2988       }
2989     }
2990   }
2991   *n_vertices = n;
2992   if (vertices_idx) *vertices_idx = vertices;
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 #undef __FUNCT__
2997 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2998 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2999 {
3000   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3001   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3002   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3003   PetscBT        touched;
3004   PetscErrorCode ierr;
3005 
3006     /* This function assumes that the number of local constraints per connected component
3007        is not greater than the number of nodes defined for the connected component
3008        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3009   PetscFunctionBegin;
3010   n = 0;
3011   constraints_index = 0;
3012   if (pcbddc->ConstraintMatrix) {
3013     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3014     max_size_of_constraint = 0;
3015     for (i=0;i<local_primal_size;i++) {
3016       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3017       if (size_of_constraint > 1) {
3018         n++;
3019       }
3020       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3021       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3022     }
3023     if (constraints_idx) {
3024       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3025       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3026       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3027       n = 0;
3028       for (i=0;i<local_primal_size;i++) {
3029         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3030         if (size_of_constraint > 1) {
3031           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3032           /* find first untouched local node */
3033           j = 0;
3034           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3035           min_index = row_cmat_global_indices[j];
3036           min_loc = j;
3037           /* search the minimum among nodes not yet touched on the connected component
3038              since there can be more than one constraint on a single cc */
3039           for (j=1;j<size_of_constraint;j++) {
3040             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3041               min_index = row_cmat_global_indices[j];
3042               min_loc = j;
3043             }
3044           }
3045           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3046           constraints_index[n++] = row_cmat_indices[min_loc];
3047         }
3048         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3049       }
3050       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3051       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3052     }
3053   }
3054   *n_constraints = n;
3055   if (constraints_idx) *constraints_idx = constraints_index;
3056   PetscFunctionReturn(0);
3057 }
3058 
3059 #undef __FUNCT__
3060 #define __FUNCT__ "PCBDDCSubsetNumbering"
3061 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3062 {
3063   Vec            local_vec,global_vec;
3064   IS             seqis,paris;
3065   VecScatter     scatter_ctx;
3066   PetscScalar    *array;
3067   PetscInt       *temp_global_dofs;
3068   PetscScalar    globalsum;
3069   PetscInt       i,j,s;
3070   PetscInt       nlocals,first_index,old_index,max_local;
3071   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3072   PetscMPIInt    *dof_sizes,*dof_displs;
3073   PetscBool      first_found;
3074   PetscErrorCode ierr;
3075 
3076   PetscFunctionBegin;
3077   /* mpi buffers */
3078   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3079   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3080   j = ( !rank_prec_comm ? size_prec_comm : 0);
3081   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3082   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3083   /* get maximum size of subset */
3084   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3085   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3086   max_local = 0;
3087   for (i=0;i<n_local_dofs;i++) {
3088     if (max_local < temp_global_dofs[i] ) {
3089       max_local = temp_global_dofs[i];
3090     }
3091   }
3092   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3093   max_global++;
3094   max_local = 0;
3095   for (i=0;i<n_local_dofs;i++) {
3096     if (max_local < local_dofs[i] ) {
3097       max_local = local_dofs[i];
3098     }
3099   }
3100   max_local++;
3101   /* allocate workspace */
3102   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3103   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3104   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3105   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3106   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3107   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3108   /* create scatter */
3109   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3110   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3111   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3112   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3113   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3114   /* init array */
3115   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3116   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3117   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3118   if (local_dofs_mult) {
3119     for (i=0;i<n_local_dofs;i++) {
3120       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3121     }
3122   } else {
3123     for (i=0;i<n_local_dofs;i++) {
3124       array[local_dofs[i]]=1.0;
3125     }
3126   }
3127   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3128   /* scatter into global vec and get total number of global dofs */
3129   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3130   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3131   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3132   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3133   /* Fill global_vec with cumulative function for global numbering */
3134   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3135   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3136   nlocals = 0;
3137   first_index = -1;
3138   first_found = PETSC_FALSE;
3139   for (i=0;i<s;i++) {
3140     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3141       first_found = PETSC_TRUE;
3142       first_index = i;
3143     }
3144     nlocals += (PetscInt)PetscRealPart(array[i]);
3145   }
3146   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3147   if (!rank_prec_comm) {
3148     dof_displs[0]=0;
3149     for (i=1;i<size_prec_comm;i++) {
3150       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3151     }
3152   }
3153   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3154   if (first_found) {
3155     array[first_index] += (PetscScalar)nlocals;
3156     old_index = first_index;
3157     for (i=first_index+1;i<s;i++) {
3158       if (PetscRealPart(array[i]) > 0.1) {
3159         array[i] += array[old_index];
3160         old_index = i;
3161       }
3162     }
3163   }
3164   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3165   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3166   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3167   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3168   /* get global ordering of local dofs */
3169   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3170   if (local_dofs_mult) {
3171     for (i=0;i<n_local_dofs;i++) {
3172       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3173     }
3174   } else {
3175     for (i=0;i<n_local_dofs;i++) {
3176       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3177     }
3178   }
3179   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3180   /* free workspace */
3181   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3182   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3183   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3184   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3185   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3186   /* return pointer to global ordering of local dofs */
3187   *global_numbering_subset = temp_global_dofs;
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 #undef __FUNCT__
3192 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3193 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3194 {
3195   PetscInt       i,j;
3196   PetscScalar    *alphas;
3197   PetscErrorCode ierr;
3198 
3199   PetscFunctionBegin;
3200   /* this implements stabilized Gram-Schmidt */
3201   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3202   for (i=0;i<n;i++) {
3203     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3204     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3205     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3206   }
3207   ierr = PetscFree(alphas);CHKERRQ(ierr);
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "MatISGetSubassemblingPattern"
3213 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3214 {
3215   Mat             subdomain_adj;
3216   IS              new_ranks,ranks_send_to;
3217   MatPartitioning partitioner;
3218   Mat_IS          *matis;
3219   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3220   PetscInt        prank;
3221   PetscMPIInt     size,rank,color;
3222   PetscInt        *xadj,*adjncy,*oldranks;
3223   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3224   PetscInt        i,local_size,threshold=0;
3225   PetscErrorCode  ierr;
3226   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3227   PetscSubcomm    subcomm;
3228 
3229   PetscFunctionBegin;
3230   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3231   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3232   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3233 
3234   /* Get info on mapping */
3235   matis = (Mat_IS*)(mat->data);
3236   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3237   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3238 
3239   /* build local CSR graph of subdomains' connectivity */
3240   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3241   xadj[0] = 0;
3242   xadj[1] = PetscMax(n_neighs-1,0);
3243   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3244   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3245 
3246   if (threshold) {
3247     PetscInt xadj_count = 0;
3248     for (i=1;i<n_neighs;i++) {
3249       if (n_shared[i] > threshold) {
3250         adjncy[xadj_count] = neighs[i];
3251         adjncy_wgt[xadj_count] = n_shared[i];
3252         xadj_count++;
3253       }
3254     }
3255     xadj[1] = xadj_count;
3256   } else {
3257     if (xadj[1]) {
3258       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3259       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3260     }
3261   }
3262   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3263   if (use_square) {
3264     for (i=0;i<xadj[1];i++) {
3265       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3266     }
3267   }
3268   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3269 
3270   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3271 
3272   /*
3273     Restrict work on active processes only.
3274   */
3275   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3276   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3277   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3278   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3279   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3280   if (color) {
3281     ierr = PetscFree(xadj);CHKERRQ(ierr);
3282     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3283     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3284   } else {
3285     PetscInt coarsening_ratio;
3286     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3287     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3288     prank = rank;
3289     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3290     /*
3291     for (i=0;i<size;i++) {
3292       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3293     }
3294     */
3295     for (i=0;i<xadj[1];i++) {
3296       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3297     }
3298     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3299     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3300     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3301 
3302     /* Partition */
3303     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3304     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3305     if (use_vwgt) {
3306       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3307       v_wgt[0] = local_size;
3308       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3309     }
3310     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3311     coarsening_ratio = size/n_subdomains;
3312     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3313     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3314     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3315     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3316 
3317     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3318     if (contiguous) {
3319       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3320     } else {
3321       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3322     }
3323     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3324     /* clean up */
3325     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3326     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3327     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3328     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3329   }
3330   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3331 
3332   /* assemble parallel IS for sends */
3333   i = 1;
3334   if (color) i=0;
3335   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3336 
3337   /* get back IS */
3338   *is_sends = ranks_send_to;
3339   PetscFunctionReturn(0);
3340 }
3341 
3342 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3343 
3344 #undef __FUNCT__
3345 #define __FUNCT__ "MatISSubassemble"
3346 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3347 {
3348   Mat                    local_mat;
3349   Mat_IS                 *matis;
3350   IS                     is_sends_internal;
3351   PetscInt               rows,cols,new_local_rows;
3352   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3353   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3354   ISLocalToGlobalMapping l2gmap;
3355   PetscInt*              l2gmap_indices;
3356   const PetscInt*        is_indices;
3357   MatType                new_local_type;
3358   /* buffers */
3359   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3360   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3361   PetscInt               *recv_buffer_idxs_local;
3362   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3363   /* MPI */
3364   MPI_Comm               comm,comm_n;
3365   PetscSubcomm           subcomm;
3366   PetscMPIInt            n_sends,n_recvs,commsize;
3367   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3368   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3369   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3370   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3371   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3372   PetscErrorCode         ierr;
3373 
3374   PetscFunctionBegin;
3375   /* TODO: add missing checks */
3376   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3377   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3378   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3379   PetscValidLogicalCollectiveInt(mat,nis,7);
3380   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3381   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3382   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3383   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3384   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3385   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3386   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3387   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3388     PetscInt mrows,mcols,mnrows,mncols;
3389     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3390     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3391     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3392     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3393     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3394     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3395   }
3396   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3397   PetscValidLogicalCollectiveInt(mat,bs,0);
3398   /* prepare IS for sending if not provided */
3399   if (!is_sends) {
3400     PetscBool pcontig = PETSC_TRUE;
3401     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3402     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3403   } else {
3404     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3405     is_sends_internal = is_sends;
3406   }
3407 
3408   /* get pointer of MATIS data */
3409   matis = (Mat_IS*)mat->data;
3410 
3411   /* get comm */
3412   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3413 
3414   /* compute number of sends */
3415   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3416   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3417 
3418   /* compute number of receives */
3419   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3420   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3421   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3422   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3423   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3424   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3425   ierr = PetscFree(iflags);CHKERRQ(ierr);
3426 
3427   /* restrict comm if requested */
3428   subcomm = 0;
3429   destroy_mat = PETSC_FALSE;
3430   if (restrict_comm) {
3431     PetscMPIInt color,rank,subcommsize;
3432     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3433     color = 0;
3434     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3435     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3436     subcommsize = commsize - subcommsize;
3437     /* check if reuse has been requested */
3438     if (reuse == MAT_REUSE_MATRIX) {
3439       if (*mat_n) {
3440         PetscMPIInt subcommsize2;
3441         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3442         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3443         comm_n = PetscObjectComm((PetscObject)*mat_n);
3444       } else {
3445         comm_n = PETSC_COMM_SELF;
3446       }
3447     } else { /* MAT_INITIAL_MATRIX */
3448       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3449       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3450       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3451       comm_n = subcomm->comm;
3452     }
3453     /* flag to destroy *mat_n if not significative */
3454     if (color) destroy_mat = PETSC_TRUE;
3455   } else {
3456     comm_n = comm;
3457   }
3458 
3459   /* prepare send/receive buffers */
3460   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3461   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3462   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3463   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3464   if (nis) {
3465     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3466   }
3467 
3468   /* Get data from local matrices */
3469   if (!isdense) {
3470     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3471     /* TODO: See below some guidelines on how to prepare the local buffers */
3472     /*
3473        send_buffer_vals should contain the raw values of the local matrix
3474        send_buffer_idxs should contain:
3475        - MatType_PRIVATE type
3476        - PetscInt        size_of_l2gmap
3477        - PetscInt        global_row_indices[size_of_l2gmap]
3478        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3479     */
3480   } else {
3481     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3482     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3483     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3484     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3485     send_buffer_idxs[1] = i;
3486     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3487     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3488     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3489     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3490     for (i=0;i<n_sends;i++) {
3491       ilengths_vals[is_indices[i]] = len*len;
3492       ilengths_idxs[is_indices[i]] = len+2;
3493     }
3494   }
3495   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3496   /* additional is (if any) */
3497   if (nis) {
3498     PetscMPIInt psum;
3499     PetscInt j;
3500     for (j=0,psum=0;j<nis;j++) {
3501       PetscInt plen;
3502       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3503       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3504       psum += len+1; /* indices + lenght */
3505     }
3506     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3507     for (j=0,psum=0;j<nis;j++) {
3508       PetscInt plen;
3509       const PetscInt *is_array_idxs;
3510       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3511       send_buffer_idxs_is[psum] = plen;
3512       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3513       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3514       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3515       psum += plen+1; /* indices + lenght */
3516     }
3517     for (i=0;i<n_sends;i++) {
3518       ilengths_idxs_is[is_indices[i]] = psum;
3519     }
3520     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3521   }
3522 
3523   buf_size_idxs = 0;
3524   buf_size_vals = 0;
3525   buf_size_idxs_is = 0;
3526   for (i=0;i<n_recvs;i++) {
3527     buf_size_idxs += (PetscInt)olengths_idxs[i];
3528     buf_size_vals += (PetscInt)olengths_vals[i];
3529     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3530   }
3531   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3532   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3533   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3534 
3535   /* get new tags for clean communications */
3536   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3537   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3538   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3539 
3540   /* allocate for requests */
3541   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3542   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3543   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3544   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3545   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3546   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3547 
3548   /* communications */
3549   ptr_idxs = recv_buffer_idxs;
3550   ptr_vals = recv_buffer_vals;
3551   ptr_idxs_is = recv_buffer_idxs_is;
3552   for (i=0;i<n_recvs;i++) {
3553     source_dest = onodes[i];
3554     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3555     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3556     ptr_idxs += olengths_idxs[i];
3557     ptr_vals += olengths_vals[i];
3558     if (nis) {
3559       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3560       ptr_idxs_is += olengths_idxs_is[i];
3561     }
3562   }
3563   for (i=0;i<n_sends;i++) {
3564     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3565     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3566     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3567     if (nis) {
3568       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3569     }
3570   }
3571   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3572   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3573 
3574   /* assemble new l2g map */
3575   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3576   ptr_idxs = recv_buffer_idxs;
3577   new_local_rows = 0;
3578   for (i=0;i<n_recvs;i++) {
3579     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3580     ptr_idxs += olengths_idxs[i];
3581   }
3582   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3583   ptr_idxs = recv_buffer_idxs;
3584   new_local_rows = 0;
3585   for (i=0;i<n_recvs;i++) {
3586     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3587     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3588     ptr_idxs += olengths_idxs[i];
3589   }
3590   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3591   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3592   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3593 
3594   /* infer new local matrix type from received local matrices type */
3595   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3596   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3597   if (n_recvs) {
3598     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3599     ptr_idxs = recv_buffer_idxs;
3600     for (i=0;i<n_recvs;i++) {
3601       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3602         new_local_type_private = MATAIJ_PRIVATE;
3603         break;
3604       }
3605       ptr_idxs += olengths_idxs[i];
3606     }
3607     switch (new_local_type_private) {
3608       case MATDENSE_PRIVATE:
3609         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3610           new_local_type = MATSEQAIJ;
3611           bs = 1;
3612         } else { /* if I receive only 1 dense matrix */
3613           new_local_type = MATSEQDENSE;
3614           bs = 1;
3615         }
3616         break;
3617       case MATAIJ_PRIVATE:
3618         new_local_type = MATSEQAIJ;
3619         bs = 1;
3620         break;
3621       case MATBAIJ_PRIVATE:
3622         new_local_type = MATSEQBAIJ;
3623         break;
3624       case MATSBAIJ_PRIVATE:
3625         new_local_type = MATSEQSBAIJ;
3626         break;
3627       default:
3628         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3629         break;
3630     }
3631   } else { /* by default, new_local_type is seqdense */
3632     new_local_type = MATSEQDENSE;
3633     bs = 1;
3634   }
3635 
3636   /* create MATIS object if needed */
3637   if (reuse == MAT_INITIAL_MATRIX) {
3638     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3639     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3640   } else {
3641     /* it also destroys the local matrices */
3642     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3643   }
3644   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3645   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3646 
3647   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3648 
3649   /* Global to local map of received indices */
3650   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3651   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3652   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3653 
3654   /* restore attributes -> type of incoming data and its size */
3655   buf_size_idxs = 0;
3656   for (i=0;i<n_recvs;i++) {
3657     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3658     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3659     buf_size_idxs += (PetscInt)olengths_idxs[i];
3660   }
3661   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3662 
3663   /* set preallocation */
3664   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3665   if (!newisdense) {
3666     PetscInt *new_local_nnz=0;
3667 
3668     ptr_vals = recv_buffer_vals;
3669     ptr_idxs = recv_buffer_idxs_local;
3670     if (n_recvs) {
3671       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3672     }
3673     for (i=0;i<n_recvs;i++) {
3674       PetscInt j;
3675       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3676         for (j=0;j<*(ptr_idxs+1);j++) {
3677           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3678         }
3679       } else {
3680         /* TODO */
3681       }
3682       ptr_idxs += olengths_idxs[i];
3683     }
3684     if (new_local_nnz) {
3685       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3686       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3687       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3688       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3689       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3690       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3691     } else {
3692       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3693     }
3694     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3695   } else {
3696     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3697   }
3698 
3699   /* set values */
3700   ptr_vals = recv_buffer_vals;
3701   ptr_idxs = recv_buffer_idxs_local;
3702   for (i=0;i<n_recvs;i++) {
3703     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3704       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3705       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3706       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3707       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3708       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3709     } else {
3710       /* TODO */
3711     }
3712     ptr_idxs += olengths_idxs[i];
3713     ptr_vals += olengths_vals[i];
3714   }
3715   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3716   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3717   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3718   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3719   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3720   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3721 
3722 #if 0
3723   if (!restrict_comm) { /* check */
3724     Vec       lvec,rvec;
3725     PetscReal infty_error;
3726 
3727     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3728     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3729     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3730     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3731     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3732     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3733     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3734     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3735     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3736   }
3737 #endif
3738 
3739   /* assemble new additional is (if any) */
3740   if (nis) {
3741     PetscInt **temp_idxs,*count_is,j,psum;
3742 
3743     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3744     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3745     ptr_idxs = recv_buffer_idxs_is;
3746     psum = 0;
3747     for (i=0;i<n_recvs;i++) {
3748       for (j=0;j<nis;j++) {
3749         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3750         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3751         psum += plen;
3752         ptr_idxs += plen+1; /* shift pointer to received data */
3753       }
3754     }
3755     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3756     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3757     for (i=1;i<nis;i++) {
3758       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3759     }
3760     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3761     ptr_idxs = recv_buffer_idxs_is;
3762     for (i=0;i<n_recvs;i++) {
3763       for (j=0;j<nis;j++) {
3764         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3765         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3766         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3767         ptr_idxs += plen+1; /* shift pointer to received data */
3768       }
3769     }
3770     for (i=0;i<nis;i++) {
3771       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3772       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3773       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3774     }
3775     ierr = PetscFree(count_is);CHKERRQ(ierr);
3776     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3777     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3778   }
3779   /* free workspace */
3780   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3781   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3782   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3783   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3784   if (isdense) {
3785     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3786     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3787   } else {
3788     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3789   }
3790   if (nis) {
3791     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3792     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3793   }
3794   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3795   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3796   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3797   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3798   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3799   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3800   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3801   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3802   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3803   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3804   ierr = PetscFree(onodes);CHKERRQ(ierr);
3805   if (nis) {
3806     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3807     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3808     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3809   }
3810   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3811   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3812     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3813     for (i=0;i<nis;i++) {
3814       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3815     }
3816   }
3817   PetscFunctionReturn(0);
3818 }
3819 
3820 /* temporary hack into ksp private data structure */
3821 #include <petsc-private/kspimpl.h>
3822 
3823 #undef __FUNCT__
3824 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3825 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3826 {
3827   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3828   PC_IS                  *pcis = (PC_IS*)pc->data;
3829   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3830   MatNullSpace           CoarseNullSpace=NULL;
3831   ISLocalToGlobalMapping coarse_islg;
3832   IS                     coarse_is,*isarray;
3833   PetscInt               i,im_active=-1,active_procs=-1;
3834   PetscInt               nis,nisdofs,nisneu;
3835   PC                     pc_temp;
3836   PCType                 coarse_pc_type;
3837   KSPType                coarse_ksp_type;
3838   PetscBool              multilevel_requested,multilevel_allowed;
3839   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3840   Mat                    t_coarse_mat_is;
3841   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3842   PetscMPIInt            all_procs;
3843   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3844   PetscBool              compute_vecs = PETSC_FALSE;
3845   PetscScalar            *array;
3846   PetscErrorCode         ierr;
3847 
3848   PetscFunctionBegin;
3849   /* Assign global numbering to coarse dofs */
3850   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3851     compute_vecs = PETSC_TRUE;
3852     PetscInt ocoarse_size;
3853     ocoarse_size = pcbddc->coarse_size;
3854     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3855     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3856     /* see if we can avoid some work */
3857     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3858       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3859         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3860         coarse_reuse = PETSC_FALSE;
3861       } else { /* we can safely reuse already computed coarse matrix */
3862         coarse_reuse = PETSC_TRUE;
3863       }
3864     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3865       coarse_reuse = PETSC_FALSE;
3866     }
3867     /* reset any subassembling information */
3868     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3869     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3870   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3871     coarse_reuse = PETSC_TRUE;
3872   }
3873 
3874   /* count "active" (i.e. with positive local size) and "void" processes */
3875   im_active = !!(pcis->n);
3876   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3877   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3878   void_procs = all_procs-active_procs;
3879   csin_type_simple = PETSC_TRUE;
3880   redist = PETSC_FALSE;
3881   if (pcbddc->current_level && void_procs) {
3882     csin_ml = PETSC_TRUE;
3883     ncoarse_ml = void_procs;
3884     csin_ds = PETSC_TRUE;
3885     ncoarse_ds = void_procs;
3886   } else {
3887     csin_ml = PETSC_FALSE;
3888     ncoarse_ml = all_procs;
3889     if (void_procs) {
3890       csin_ds = PETSC_TRUE;
3891       ncoarse_ds = void_procs;
3892       csin_type_simple = PETSC_FALSE;
3893     } else {
3894       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3895         csin_ds = PETSC_TRUE;
3896         ncoarse_ds = pcbddc->redistribute_coarse;
3897         redist = PETSC_TRUE;
3898       } else {
3899         csin_ds = PETSC_FALSE;
3900         ncoarse_ds = all_procs;
3901       }
3902     }
3903   }
3904 
3905   /*
3906     test if we can go multilevel: three conditions must be satisfied:
3907     - we have not exceeded the number of levels requested
3908     - we can actually subassemble the active processes
3909     - we can find a suitable number of MPI processes where we can place the subassembled problem
3910   */
3911   multilevel_allowed = PETSC_FALSE;
3912   multilevel_requested = PETSC_FALSE;
3913   if (pcbddc->current_level < pcbddc->max_levels) {
3914     multilevel_requested = PETSC_TRUE;
3915     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3916       multilevel_allowed = PETSC_FALSE;
3917     } else {
3918       multilevel_allowed = PETSC_TRUE;
3919     }
3920   }
3921   /* determine number of process partecipating to coarse solver */
3922   if (multilevel_allowed) {
3923     ncoarse = ncoarse_ml;
3924     csin = csin_ml;
3925   } else {
3926     ncoarse = ncoarse_ds;
3927     csin = csin_ds;
3928   }
3929 
3930   /* creates temporary l2gmap and IS for coarse indexes */
3931   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3932   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3933 
3934   /* creates temporary MATIS object for coarse matrix */
3935   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3936   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3937   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3938   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3939 #if 0
3940   {
3941     PetscViewer viewer;
3942     char filename[256];
3943     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3944     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3945     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3946     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3947     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3948   }
3949 #endif
3950   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3951   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3952   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3955 
3956   /* compute dofs splitting and neumann boundaries for coarse dofs */
3957   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3958     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3959     const PetscInt         *idxs;
3960     ISLocalToGlobalMapping tmap;
3961 
3962     /* create map between primal indices (in local representative ordering) and local primal numbering */
3963     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3964     /* allocate space for temporary storage */
3965     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3966     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3967     /* allocate for IS array */
3968     nisdofs = pcbddc->n_ISForDofsLocal;
3969     nisneu = !!pcbddc->NeumannBoundariesLocal;
3970     nis = nisdofs + nisneu;
3971     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3972     /* dofs splitting */
3973     for (i=0;i<nisdofs;i++) {
3974       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3975       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3976       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3977       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3978       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3979       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3980       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3981       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3982     }
3983     /* neumann boundaries */
3984     if (pcbddc->NeumannBoundariesLocal) {
3985       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3986       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3987       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3988       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3989       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3990       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3991       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3992       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3993     }
3994     /* free memory */
3995     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3996     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3997     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3998   } else {
3999     nis = 0;
4000     nisdofs = 0;
4001     nisneu = 0;
4002     isarray = NULL;
4003   }
4004   /* destroy no longer needed map */
4005   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4006 
4007   /* restrict on coarse candidates (if needed) */
4008   coarse_mat_is = NULL;
4009   if (csin) {
4010     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4011       if (redist) {
4012         PetscMPIInt rank;
4013         PetscInt spc,n_spc_p1,dest[1];
4014 
4015         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4016         spc = all_procs/pcbddc->redistribute_coarse;
4017         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
4018         if (rank > n_spc_p1*(spc+1)-1) {
4019           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4020         } else {
4021           dest[0] = rank/(spc+1);
4022         }
4023         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4024       } else {
4025         PetscInt j,tissize,*nisindices;
4026         PetscInt *coarse_candidates;
4027         const PetscInt* tisindices;
4028         /* get coarse candidates' ranks in pc communicator */
4029         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4030         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4031         for (i=0,j=0;i<all_procs;i++) {
4032           if (!coarse_candidates[i]) {
4033             coarse_candidates[j]=i;
4034             j++;
4035           }
4036         }
4037         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
4038         /* get a suitable subassembling pattern */
4039         if (csin_type_simple) {
4040           PetscMPIInt rank;
4041           PetscInt    issize,isidx;
4042           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4043           if (im_active) {
4044             issize = 1;
4045             isidx = (PetscInt)rank;
4046           } else {
4047             issize = 0;
4048             isidx = -1;
4049           }
4050           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4051         } else {
4052           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4053         }
4054         if (pcbddc->dbg_flag) {
4055           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4056           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4057           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4058           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4059           for (i=0;i<j;i++) {
4060             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4061           }
4062           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4063           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4064         }
4065         /* shift the pattern on coarse candidates */
4066         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4067         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4068         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4069         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4070         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4071         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4072         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4073       }
4074     }
4075     if (pcbddc->dbg_flag) {
4076       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4077       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4078       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4079       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4080     }
4081     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4082     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4083   } else {
4084     if (pcbddc->dbg_flag) {
4085       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4086       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4087       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4088     }
4089     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4090     coarse_mat_is = t_coarse_mat_is;
4091   }
4092 
4093   /* create local to global scatters for coarse problem */
4094   if (compute_vecs) {
4095     PetscInt lrows;
4096     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4097     if (coarse_mat_is) {
4098       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4099     } else {
4100       lrows = 0;
4101     }
4102     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4103     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4104     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4105     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4106     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4107   }
4108   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4109   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4110 
4111   /* set defaults for coarse KSP and PC */
4112   if (multilevel_allowed) {
4113     coarse_ksp_type = KSPRICHARDSON;
4114     coarse_pc_type = PCBDDC;
4115   } else {
4116     coarse_ksp_type = KSPPREONLY;
4117     coarse_pc_type = PCREDUNDANT;
4118   }
4119 
4120   /* print some info if requested */
4121   if (pcbddc->dbg_flag) {
4122     if (!multilevel_allowed) {
4123       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4124       if (multilevel_requested) {
4125         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4126       } else if (pcbddc->max_levels) {
4127         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4128       }
4129       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4130     }
4131   }
4132 
4133   /* create the coarse KSP object only once with defaults */
4134   if (coarse_mat_is) {
4135     MatReuse coarse_mat_reuse;
4136     PetscViewer dbg_viewer = NULL;
4137     if (pcbddc->dbg_flag) {
4138       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4139       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4140     }
4141     if (!pcbddc->coarse_ksp) {
4142       char prefix[256],str_level[16];
4143       size_t len;
4144       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4145       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4146       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4147       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4148       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4149       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4150       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4151       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4152       /* prefix */
4153       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4154       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4155       if (!pcbddc->current_level) {
4156         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4157         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4158       } else {
4159         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4160         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4161         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4162         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4163         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4164         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4165       }
4166       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4167       /* allow user customization */
4168       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4169       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4170     }
4171 
4172     /* get some info after set from options */
4173     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4174     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4175     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4176     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4177     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4178       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4179       isbddc = PETSC_FALSE;
4180     }
4181     if (isredundant) {
4182       KSP inner_ksp;
4183       PC inner_pc;
4184       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4185       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4186       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4187     }
4188 
4189     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4190     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4191     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4192     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4193     if (nisdofs) {
4194       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4195       for (i=0;i<nisdofs;i++) {
4196         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4197       }
4198     }
4199     if (nisneu) {
4200       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4201       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4202     }
4203 
4204     /* assemble coarse matrix */
4205     if (coarse_reuse) {
4206       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4207       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4208       coarse_mat_reuse = MAT_REUSE_MATRIX;
4209     } else {
4210       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4211     }
4212     if (isbddc || isnn) {
4213       if (pcbddc->coarsening_ratio > 1) {
4214         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4215           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4216           if (pcbddc->dbg_flag) {
4217             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4218             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4219             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4220             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4221           }
4222         }
4223         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4224       } else {
4225         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4226         coarse_mat = coarse_mat_is;
4227       }
4228     } else {
4229       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4230     }
4231     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4232 
4233     /* propagate symmetry info to coarse matrix */
4234     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4235     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4236 
4237     /* set operators */
4238     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4239     if (pcbddc->dbg_flag) {
4240       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4241     }
4242   } else { /* processes non partecipating to coarse solver (if any) */
4243     coarse_mat = 0;
4244   }
4245   ierr = PetscFree(isarray);CHKERRQ(ierr);
4246 #if 0
4247   {
4248     PetscViewer viewer;
4249     char filename[256];
4250     sprintf(filename,"coarse_mat.m");
4251     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4252     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4253     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4254     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4255   }
4256 #endif
4257 
4258   /* Compute coarse null space (special handling by BDDC only) */
4259   if (pcbddc->NullSpace) {
4260     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4261   }
4262 
4263   if (pcbddc->coarse_ksp) {
4264     Vec crhs,csol;
4265     PetscBool ispreonly;
4266     if (CoarseNullSpace) {
4267       if (isbddc) {
4268         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4269       } else {
4270         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4271       }
4272     }
4273     /* setup coarse ksp */
4274     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4275     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4276     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4277     /* hack */
4278     if (!csol) {
4279       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4280     }
4281     if (!crhs) {
4282       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4283     }
4284     /* Check coarse problem if in debug mode or if solving with an iterative method */
4285     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4286     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4287       KSP       check_ksp;
4288       KSPType   check_ksp_type;
4289       PC        check_pc;
4290       Vec       check_vec,coarse_vec;
4291       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4292       PetscInt  its;
4293       PetscBool compute_eigs;
4294       PetscReal *eigs_r,*eigs_c;
4295       PetscInt  neigs;
4296       const char *prefix;
4297 
4298       /* Create ksp object suitable for estimation of extreme eigenvalues */
4299       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4300       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4301       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4302       if (ispreonly) {
4303         check_ksp_type = KSPPREONLY;
4304         compute_eigs = PETSC_FALSE;
4305       } else {
4306         check_ksp_type = KSPGMRES;
4307         compute_eigs = PETSC_TRUE;
4308       }
4309       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4310       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4311       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4312       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4313       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4314       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4315       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4316       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4317       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4318       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4319       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4320       /* create random vec */
4321       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4322       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4323       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4324       if (CoarseNullSpace) {
4325         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4326       }
4327       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4328       /* solve coarse problem */
4329       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4330       if (CoarseNullSpace) {
4331         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4332       }
4333       /* set eigenvalue estimation if preonly has not been requested */
4334       if (compute_eigs) {
4335         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4336         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4337         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4338         lambda_max = eigs_r[neigs-1];
4339         lambda_min = eigs_r[0];
4340         if (pcbddc->use_coarse_estimates) {
4341           if (lambda_max>lambda_min) {
4342             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4343             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4344           }
4345         }
4346       }
4347 
4348       /* check coarse problem residual error */
4349       if (pcbddc->dbg_flag) {
4350         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4351         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4352         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4353         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4354         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4355         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4356         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4357         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4358         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4359         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4360         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4361         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4362         if (compute_eigs) {
4363           PetscReal lambda_max_s,lambda_min_s;
4364           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4365           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4366           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4367           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4368           for (i=0;i<neigs;i++) {
4369             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4370           }
4371         }
4372         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4373         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4374       }
4375       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4376       if (compute_eigs) {
4377         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4378         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4379       }
4380     }
4381   }
4382   /* print additional info */
4383   if (pcbddc->dbg_flag) {
4384     /* waits until all processes reaches this point */
4385     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4386     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4387     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4388   }
4389 
4390   /* free memory */
4391   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4392   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4393   PetscFunctionReturn(0);
4394 }
4395 
4396 #undef __FUNCT__
4397 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4398 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4399 {
4400   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4401   PC_IS*         pcis = (PC_IS*)pc->data;
4402   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4403   PetscInt       i,coarse_size;
4404   PetscInt       *local_primal_indices;
4405   PetscErrorCode ierr;
4406 
4407   PetscFunctionBegin;
4408   /* Compute global number of coarse dofs */
4409   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4410     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4411   }
4412   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4413 
4414   /* check numbering */
4415   if (pcbddc->dbg_flag) {
4416     PetscScalar coarsesum,*array;
4417     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4418 
4419     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4420     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4421     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4422     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4423     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4424     for (i=0;i<pcbddc->local_primal_size;i++) {
4425       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4426     }
4427     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4428     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4429     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4430     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4431     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4432     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4433     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4434     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4435     for (i=0;i<pcis->n;i++) {
4436       if (array[i] == 1.0) {
4437         set_error = PETSC_TRUE;
4438         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4439       }
4440     }
4441     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4442     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4443     for (i=0;i<pcis->n;i++) {
4444       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4445     }
4446     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4447     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4448     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4449     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4450     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4451     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4452     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4453       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4454       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4455       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4456       for (i=0;i<pcbddc->local_primal_size;i++) {
4457         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4458       }
4459       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4460     }
4461     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4462     if (set_error_reduced) {
4463       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4464     }
4465   }
4466   /* get back data */
4467   *coarse_size_n = coarse_size;
4468   *local_primal_indices_n = local_primal_indices;
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 #undef __FUNCT__
4473 #define __FUNCT__ "PCBDDCGlobalToLocal"
4474 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4475 {
4476   IS             localis_t;
4477   PetscInt       i,lsize,*idxs,n;
4478   PetscScalar    *vals;
4479   PetscErrorCode ierr;
4480 
4481   PetscFunctionBegin;
4482   /* get indices in local ordering exploiting local to global map */
4483   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4484   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4485   for (i=0;i<lsize;i++) vals[i] = 1.0;
4486   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4487   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4488   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4489   if (idxs) { /* multilevel guard */
4490     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4491   }
4492   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4493   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4494   ierr = PetscFree(vals);CHKERRQ(ierr);
4495   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4496   /* now compute set in local ordering */
4497   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4498   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4499   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4500   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4501   for (i=0,lsize=0;i<n;i++) {
4502     if (PetscRealPart(vals[i]) > 0.5) {
4503       lsize++;
4504     }
4505   }
4506   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4507   for (i=0,lsize=0;i<n;i++) {
4508     if (PetscRealPart(vals[i]) > 0.5) {
4509       idxs[lsize++] = i;
4510     }
4511   }
4512   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4513   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4514   *localis = localis_t;
4515   PetscFunctionReturn(0);
4516 }
4517 
4518 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4519 #undef __FUNCT__
4520 #define __FUNCT__ "PCBDDCMatMult_Private"
4521 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4522 {
4523   PCBDDCChange_ctx change_ctx;
4524   PetscErrorCode   ierr;
4525 
4526   PetscFunctionBegin;
4527   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4528   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4529   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4530   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4531   PetscFunctionReturn(0);
4532 }
4533 
4534 #undef __FUNCT__
4535 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4536 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4537 {
4538   PCBDDCChange_ctx change_ctx;
4539   PetscErrorCode   ierr;
4540 
4541   PetscFunctionBegin;
4542   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4543   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4544   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4545   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4546   PetscFunctionReturn(0);
4547 }
4548 
4549 #undef __FUNCT__
4550 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4551 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4552 {
4553   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4554   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4555   PetscInt            *used_xadj,*used_adjncy;
4556   PetscBool           free_used_adj;
4557   PetscErrorCode      ierr;
4558 
4559   PetscFunctionBegin;
4560   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4561   free_used_adj = PETSC_FALSE;
4562   if (pcbddc->sub_schurs_layers == -1) {
4563     used_xadj = NULL;
4564     used_adjncy = NULL;
4565   } else {
4566     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4567       used_xadj = pcbddc->mat_graph->xadj;
4568       used_adjncy = pcbddc->mat_graph->adjncy;
4569     } else if (pcbddc->computed_rowadj) {
4570       used_xadj = pcbddc->mat_graph->xadj;
4571       used_adjncy = pcbddc->mat_graph->adjncy;
4572     } else {
4573       Mat            mat_adj;
4574       PetscBool      flg_row=PETSC_TRUE;
4575       const PetscInt *xadj,*adjncy;
4576       PetscInt       nvtxs;
4577 
4578       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4579       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4580       if (!flg_row) {
4581         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4582       }
4583       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4584       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4585       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4586       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4587       if (!flg_row) {
4588         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4589       }
4590       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4591       free_used_adj = PETSC_TRUE;
4592     }
4593   }
4594   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4595 
4596   /* free adjacency */
4597   if (free_used_adj) {
4598     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4599   }
4600   PetscFunctionReturn(0);
4601 }
4602 
4603 #undef __FUNCT__
4604 #define __FUNCT__ "PCBDDCInitSubSchurs"
4605 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4606 {
4607   PC_IS               *pcis=(PC_IS*)pc->data;
4608   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4609   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4610   PCBDDCGraph         graph;
4611   Mat                 S_j;
4612   PetscErrorCode      ierr;
4613 
4614   PetscFunctionBegin;
4615   /* attach interface graph for determining subsets */
4616   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4617     IS verticesIS;
4618 
4619     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4620     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4621     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4622     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4623     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4624     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4625 /*
4626     if (pcbddc->dbg_flag) {
4627       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4628     }
4629 */
4630   } else {
4631     graph = pcbddc->mat_graph;
4632   }
4633 
4634   /* Create Schur complement matrix */
4635   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4636   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4637 
4638   /* sub_schurs init */
4639   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4640   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4641   /* free graph struct */
4642   if (pcbddc->sub_schurs_rebuild) {
4643     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4644   }
4645   PetscFunctionReturn(0);
4646 }
4647