xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 5408967cf80e11a7f7b1797e0b68955f747486b2)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignCheck"
10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PC_IS*         pcis = (PC_IS*)(pc->data);
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   if (zerodiag) {
18     Mat            A;
19     Vec            vec3_N;
20     IS             dirIS = NULL;
21     PetscScalar    *vals;
22     const PetscInt *idxs;
23     PetscInt       i,nz;
24 
25     /* p0 */
26     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
27     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
28     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
29     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
30     for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */
31     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
32     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
33     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
34     /* v_I */
35     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
36     for (i=0;i<nz;i++) vals[i] = 0.;
37     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
38     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
39     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
40     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
41     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
42     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
43     ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
44     if (dirIS) {
45       PetscInt n;
46 
47       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
48       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
49       for (i=0;i<n;i++) vals[i] = 0.;
50       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
51       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
52     }
53     ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
54     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
55     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
56     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
57     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
58     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
59     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
60     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
61     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */
62       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
63     }
64     ierr = PetscFree(vals);CHKERRQ(ierr);
65     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
66   }
67 
68   /* check PCBDDCBenignGetOrSetP0 */
69   if (pcbddc->benign_saddle_point) {
70     ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
71     pcbddc->benign_p0 = -PetscGlobalRank;
72     ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
73     pcbddc->benign_p0 = 1;
74     ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
75     if (pcbddc->benign_p0_gidx >=0 && pcbddc->benign_p0 != -PetscGlobalRank) {
76       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr);
77     }
78   }
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
84 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
85 {
86   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
87   IS             zerodiag;
88   PetscInt       nz;
89   PetscBool      sorted;
90   PetscErrorCode ierr;
91 
92   PetscFunctionBegin;
93   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
94   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
95   pcbddc->benign_original_mat = pcbddc->local_mat;
96   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
97   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
98   if (!sorted) {
99     ierr = ISSort(zerodiag);CHKERRQ(ierr);
100   }
101   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
102   if (nz) {
103     IS                zerodiagc;
104     PetscScalar       *array;
105     const PetscInt    *idxs,*idxsc;
106     PetscInt          i,n,*nnz;
107 
108     /* TODO: add check for shared dofs and raise error */
109     ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
110     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
111     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
112     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
113     /* local change of basis for pressures */
114     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
115     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
116     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
117     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
118     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
119     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */
120     for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */
121     nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */
122     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
123     ierr = PetscFree(nnz);CHKERRQ(ierr);
124     /* set identity on velocities */
125     for (i=0;i<n-nz;i++) {
126       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
127     }
128     /* set change on pressures */
129     for (i=0;i<nz-1;i++) {
130       PetscScalar vals[2];
131       PetscInt    cols[2];
132 
133       /* TODO: add quadrature */
134       cols[0] = idxs[i];
135       cols[1] = idxs[nz-1];
136       vals[0] = 1.;
137       vals[1] = 1./nz;
138       ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
139     }
140     ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr);
141     for (i=0;i<nz-1;i++) array[i] = -1.;
142     array[nz-1] = 1./nz;
143     ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
144     ierr = PetscFree(array);CHKERRQ(ierr);
145     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
146     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
147     /* TODO: need optimization? */
148     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
149     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
150     /* store local and global idxs for p0 */
151     pcbddc->benign_p0_lidx = idxs[nz-1];
152     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
153     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
154     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
155     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
156   } else { /* this is unlikely to happen but, just in case, destroy the empty IS */
157     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
158   }
159   *zerodiaglocal = zerodiag;
160   PetscFunctionReturn(0);
161 }
162 
163 #undef __FUNCT__
164 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
165 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
166 {
167   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
168   PetscErrorCode ierr;
169 
170   PetscFunctionBegin;
171   if (!pcbddc->benign_sf) {
172     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
173     if (pcbddc->benign_p0_gidx >= 0) {
174       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
175     } else {
176       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
177     }
178   }
179   if (get) { /* use SF to get values */
180     PetscScalar *array;
181 
182     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
183     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
184     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
185     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
186   } else { /* use VecSetValue */
187     if (pcbddc->benign_p0_gidx >= 0) {
188       ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
189     }
190     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
191     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 #undef __FUNCT__
197 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
198 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
199 {
200   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
201   PetscErrorCode ierr;
202 
203   PetscFunctionBegin;
204   /* TODO: add error checking
205     - avoid nested pop (or push) calls.
206     - cannot push before pop.
207     - cannot call this if pcbddc->local_mat is NULL
208   */
209   if (pcbddc->benign_p0_lidx < 0) {
210     PetscFunctionReturn(0);
211   }
212   if (pop) {
213     const PetscInt    *cB0_cols;
214     PetscInt          cB0_ncol;
215     const PetscScalar *cB0_vals;
216 
217     /* extract B_0 */
218     ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
219     pcbddc->B0_ncol = cB0_ncol;
220     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
221     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
222     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
223     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
224     ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
225     /* remove rows and cols from local problem */
226     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
227     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr);
228   } else { /* push */
229     ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
230     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
231     ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr);
232     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "PCBDDCAdaptiveSelection"
240 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
241 {
242   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
243   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
244   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
245   PetscBLASInt    *B_iwork,*B_ifail;
246   PetscScalar     *work,lwork;
247   PetscScalar     *St,*S,*eigv;
248   PetscScalar     *Sarray,*Starray;
249   PetscReal       *eigs,thresh;
250   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
251   PetscBool       allocated_S_St;
252 #if defined(PETSC_USE_COMPLEX)
253   PetscReal       *rwork;
254 #endif
255   PetscErrorCode  ierr;
256 
257   PetscFunctionBegin;
258   if (!sub_schurs->use_mumps) {
259     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
260   }
261 
262   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
263     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
264   }
265 
266   if (pcbddc->dbg_flag) {
267     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
268     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
269     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
270     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
271   }
272 
273   if (pcbddc->dbg_flag) {
274     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
275   }
276 
277   /* max size of subsets */
278   mss = 0;
279   for (i=0;i<sub_schurs->n_subs;i++) {
280     PetscInt subset_size;
281 
282     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
283     mss = PetscMax(mss,subset_size);
284   }
285 
286   /* min/max and threshold */
287   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
288   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
289   nmax = PetscMax(nmin,nmax);
290   allocated_S_St = PETSC_FALSE;
291   if (nmin) {
292     allocated_S_St = PETSC_TRUE;
293   }
294 
295   /* allocate lapack workspace */
296   cum = cum2 = 0;
297   maxneigs = 0;
298   for (i=0;i<sub_schurs->n_subs;i++) {
299     PetscInt n,subset_size;
300 
301     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
302     n = PetscMin(subset_size,nmax);
303     cum += subset_size;
304     cum2 += subset_size*n;
305     maxneigs = PetscMax(maxneigs,n);
306   }
307   if (mss) {
308     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
309       PetscBLASInt B_itype = 1;
310       PetscBLASInt B_N = mss;
311       PetscReal    zero = 0.0;
312       PetscReal    eps = 0.0; /* dlamch? */
313 
314       B_lwork = -1;
315       S = NULL;
316       St = NULL;
317       eigs = NULL;
318       eigv = NULL;
319       B_iwork = NULL;
320       B_ifail = NULL;
321 #if defined(PETSC_USE_COMPLEX)
322       rwork = NULL;
323 #endif
324       thresh = 1.0;
325       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
326 #if defined(PETSC_USE_COMPLEX)
327       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
328 #else
329       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
330 #endif
331       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
332       ierr = PetscFPTrapPop();CHKERRQ(ierr);
333     } else {
334         /* TODO */
335     }
336   } else {
337     lwork = 0;
338   }
339 
340   nv = 0;
341   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
342     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
343   }
344   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
345   if (allocated_S_St) {
346     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
347   }
348   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
349 #if defined(PETSC_USE_COMPLEX)
350   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
351 #endif
352   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
353                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
354                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
355                       nv+cum,&pcbddc->adaptive_constraints_idxs,
356                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
357   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
358 
359   maxneigs = 0;
360   cum = cum2 = cumarray = 0;
361   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
362   pcbddc->adaptive_constraints_data_ptr[0] = 0;
363   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
364     const PetscInt *idxs;
365 
366     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
367     for (cum=0;cum<nv;cum++) {
368       pcbddc->adaptive_constraints_n[cum] = 1;
369       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
370       pcbddc->adaptive_constraints_data[cum] = 1.0;
371       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
372       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
373     }
374     cum2 = cum;
375     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
376   }
377 
378   if (mss) { /* multilevel */
379     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
380     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
381   }
382 
383   for (i=0;i<sub_schurs->n_subs;i++) {
384 
385     const PetscInt *idxs;
386     PetscReal      infty = PETSC_MAX_REAL;
387     PetscInt       j,subset_size,eigs_start = 0;
388     PetscBLASInt   B_N;
389     PetscBool      same_data = PETSC_FALSE;
390 
391     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
392     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
393     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
394       if (sub_schurs->is_hermitian) {
395         PetscInt j,k;
396         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
397           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
398           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
399         }
400         for (j=0;j<subset_size;j++) {
401           for (k=j;k<subset_size;k++) {
402             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
403             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
404           }
405         }
406       } else {
407         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
408         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
409       }
410     } else {
411       S = Sarray + cumarray;
412       St = Starray + cumarray;
413     }
414 
415     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
416     /* see if we can save some work */
417     if (sub_schurs->n_subs == 1) {
418       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
419     }
420 
421     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
422       B_neigs = 0;
423     } else {
424       /* Threshold: this is an heuristic for edges */
425       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
426 
427       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
428         PetscBLASInt B_itype = 1;
429         PetscBLASInt B_IL, B_IU;
430         PetscReal    eps = -1.0; /* dlamch? */
431         PetscInt     nmin_s;
432 
433         if (pcbddc->dbg_flag) {
434           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
435         }
436 
437         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
438         if (thresh > 1.+PETSC_SMALL) {
439 
440           /* ask for eigenvalues larger than thresh */
441 #if defined(PETSC_USE_COMPLEX)
442           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
443 #else
444           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
445 #endif
446         } else {
447           B_IU = PetscMax(1,PetscMin(B_N,nmax));
448           B_IL = 1;
449 #if defined(PETSC_USE_COMPLEX)
450           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
451 #else
452           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
453 #endif
454         }
455         ierr = PetscFPTrapPop();CHKERRQ(ierr);
456         if (B_ierr) {
457           if (B_ierr < 0 ) {
458             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
459           } else if (B_ierr <= B_N) {
460             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
461           } else {
462             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
463           }
464         }
465 
466         if (B_neigs > nmax) {
467           if (pcbddc->dbg_flag) {
468             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
469           }
470           eigs_start = B_neigs -nmax;
471           B_neigs = nmax;
472         }
473 
474         nmin_s = PetscMin(nmin,B_N);
475         if (B_neigs < nmin_s) {
476           PetscBLASInt B_neigs2;
477 
478           B_IU = B_N - B_neigs;
479           B_IL = B_N - nmin_s + 1;
480           if (pcbddc->dbg_flag) {
481             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
482           }
483           if (sub_schurs->is_hermitian) {
484             PetscInt j;
485             for (j=0;j<subset_size;j++) {
486               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
487             }
488             for (j=0;j<subset_size;j++) {
489               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
490             }
491           } else {
492             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
493             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
494           }
495           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
496 #if defined(PETSC_USE_COMPLEX)
497           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
498 #else
499           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
500 #endif
501           ierr = PetscFPTrapPop();CHKERRQ(ierr);
502           B_neigs += B_neigs2;
503         }
504         if (B_ierr) {
505           if (B_ierr < 0 ) {
506             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
507           } else if (B_ierr <= B_N) {
508             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
509           } else {
510             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
511           }
512         }
513         if (pcbddc->dbg_flag) {
514           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
515           for (j=0;j<B_neigs;j++) {
516             if (eigs[j] == 0.0) {
517               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
518             } else {
519               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
520             }
521           }
522         }
523       } else {
524           /* TODO */
525       }
526     }
527     maxneigs = PetscMax(B_neigs,maxneigs);
528     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
529     if (B_neigs) {
530       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
531 
532       if (pcbddc->dbg_flag > 1) {
533         PetscInt ii;
534         for (ii=0;ii<B_neigs;ii++) {
535           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
536           for (j=0;j<B_N;j++) {
537 #if defined(PETSC_USE_COMPLEX)
538             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
539             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
540             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
541 #else
542             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
543 #endif
544           }
545         }
546       }
547 #if 0
548       for (j=0;j<B_neigs;j++) {
549         PetscBLASInt Blas_N,Blas_one = 1.0;
550         PetscScalar norm;
551         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
552         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
553                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
554         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
555           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
556         } else {
557           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
558         }
559         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
560       }
561 #endif
562       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
563       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
564       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
565       cum++;
566     }
567     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
568     /* shift for next computation */
569     cumarray += subset_size*subset_size;
570   }
571   if (pcbddc->dbg_flag) {
572     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
573   }
574 
575   if (mss) {
576     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
577     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
578     /* destroy matrices (junk) */
579     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
580     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
581   }
582   if (allocated_S_St) {
583     ierr = PetscFree2(S,St);CHKERRQ(ierr);
584   }
585   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
586 #if defined(PETSC_USE_COMPLEX)
587   ierr = PetscFree(rwork);CHKERRQ(ierr);
588 #endif
589   if (pcbddc->dbg_flag) {
590     PetscInt maxneigs_r;
591     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
592     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
593   }
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "PCBDDCSetUpSolvers"
599 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
600 {
601   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
602   PetscScalar    *coarse_submat_vals;
603   PetscErrorCode ierr;
604 
605   PetscFunctionBegin;
606   /* Setup local scatters R_to_B and (optionally) R_to_D */
607   /* PCBDDCSetUpLocalWorkVectors should be called first! */
608   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
609 
610   /* Setup local neumann solver ksp_R */
611   /* PCBDDCSetUpLocalScatters should be called first! */
612   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
613 
614   /* Change global null space passed in by the user if change of basis has been requested */
615   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
616     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
617   }
618 
619   /*
620      Setup local correction and local part of coarse basis.
621      Gives back the dense local part of the coarse matrix in column major ordering
622   */
623   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
624 
625   /* Compute total number of coarse nodes and setup coarse solver */
626   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
627 
628   /* free */
629   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "PCBDDCResetCustomization"
635 PetscErrorCode PCBDDCResetCustomization(PC pc)
636 {
637   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
638   PetscErrorCode ierr;
639 
640   PetscFunctionBegin;
641   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
642   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
643   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
644   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
645   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
646   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
647   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
648   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
649   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
650   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
651   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "PCBDDCResetTopography"
657 PetscErrorCode PCBDDCResetTopography(PC pc)
658 {
659   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
660   PetscErrorCode ierr;
661 
662   PetscFunctionBegin;
663   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
664   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
665   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
666   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
667   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "PCBDDCResetSolvers"
673 PetscErrorCode PCBDDCResetSolvers(PC pc)
674 {
675   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
676   PetscScalar    *array;
677   PetscErrorCode ierr;
678 
679   PetscFunctionBegin;
680   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
681   if (pcbddc->coarse_phi_B) {
682     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
683     ierr = PetscFree(array);CHKERRQ(ierr);
684   }
685   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
686   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
687   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
688   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
689   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
690   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
691   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
692   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
693   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
694   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
695   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
696   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
697   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
698   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
699   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
700   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
701   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
702   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
703   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
704   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
705   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
706   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
707   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
708   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
709   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
710   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
711   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
712   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
718 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
719 {
720   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
721   PC_IS          *pcis = (PC_IS*)pc->data;
722   VecType        impVecType;
723   PetscInt       n_constraints,n_R,old_size,n_benign;
724   PetscErrorCode ierr;
725 
726   PetscFunctionBegin;
727   if (!pcbddc->ConstraintMatrix) {
728     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
729   }
730   /* get sizes */
731   n_benign = 0;
732   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
733   n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices;
734   n_R = pcis->n - pcbddc->n_vertices;
735   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
736   /* local work vectors (try to avoid unneeded work)*/
737   /* R nodes */
738   old_size = -1;
739   if (pcbddc->vec1_R) {
740     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
741   }
742   if (n_R != old_size) {
743     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
744     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
745     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
746     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
747     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
748     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
749   }
750   /* local primal dofs */
751   old_size = -1;
752   if (pcbddc->vec1_P) {
753     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
754   }
755   if (pcbddc->local_primal_size != old_size) {
756     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
757     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
758     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
759     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
760   }
761   /* local explicit constraints */
762   old_size = -1;
763   if (pcbddc->vec1_C) {
764     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
765   }
766   if (n_constraints && n_constraints != old_size) {
767     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
768     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
769     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
770     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "PCBDDCSetUpCorrection"
777 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
778 {
779   PetscErrorCode  ierr;
780   /* pointers to pcis and pcbddc */
781   PC_IS*          pcis = (PC_IS*)pc->data;
782   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
783   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
784   /* submatrices of local problem */
785   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
786   /* submatrices of benign trick */
787   Mat             B0_V = NULL;
788   /* submatrices of local coarse problem */
789   Mat             S_VV,S_CV,S_VC,S_CC;
790   /* working matrices */
791   Mat             C_CR;
792   /* additional working stuff */
793   PC              pc_R;
794   Mat             F,B0 = NULL;
795   PetscBool       isLU,isCHOL,isILU;
796 
797   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
798   PetscScalar     *work;
799   PetscInt        *idx_V_B;
800   PetscInt        n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0;
801   PetscInt        i,n_R,n_D,n_B;
802   PetscBool       unsymmetric_check;
803   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
804   MatType         impMatType;
805   /* some shortcuts to scalars */
806   PetscScalar     one=1.0,m_one=-1.0;
807 
808   PetscFunctionBegin;
809   n_benign = 0;
810   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
811   n_vertices = pcbddc->n_vertices;
812   n_constraints = pcbddc->local_primal_size - n_benign - n_vertices;
813   /* Set Non-overlapping dimensions */
814   n_B = pcis->n_B;
815   n_D = pcis->n - n_B;
816   n_R = pcis->n - n_vertices;
817 
818   /* Set types for local objects needed by BDDC precondtioner */
819   impMatType = MATSEQDENSE;
820 
821   /* vertices in boundary numbering */
822   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
823   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
824   if (i != n_vertices) {
825     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
826   }
827 
828   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
829   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
830   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
831   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
832   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
833   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
834   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
835   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
836   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
837   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
838 
839   unsymmetric_check = PETSC_FALSE;
840   /* allocate workspace */
841   n = 0;
842   if (n_constraints) {
843     n += n_R*n_constraints;
844   }
845   if (n_vertices) {
846     n = PetscMax(2*n_R*n_vertices,n);
847     n = PetscMax((n_R+n_B)*n_vertices,n);
848   }
849   if (!pcbddc->symmetric_primal) {
850     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
851     unsymmetric_check = PETSC_TRUE;
852   }
853   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
854 
855   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
856   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
857   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
858   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
859   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
860   if (isLU || isILU || isCHOL) {
861     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
862   } else if (sub_schurs->reuse_mumps) {
863     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
864     MatFactorType type;
865 
866     F = reuse_mumps->F;
867     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
868     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
869   } else {
870     F = NULL;
871   }
872 
873   /* Precompute stuffs needed for preprocessing and application of BDDC*/
874   if (n_constraints) {
875     Mat         M1,M2,M3;
876     Mat         auxmat;
877     IS          is_aux;
878     PetscScalar *array,*array2;
879 
880     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
881     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
882 
883     /* Extract constraints on R nodes: C_{CR}  */
884     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
885     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
886     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
887 
888     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
889     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
890     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
891     for (i=0;i<n_constraints;i++) {
892       const PetscScalar *row_cmat_values;
893       const PetscInt    *row_cmat_indices;
894       PetscInt          size_of_constraint,j;
895 
896       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
897       for (j=0;j<size_of_constraint;j++) {
898         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
899       }
900       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
901     }
902     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
903     if (F) {
904       Mat B;
905 
906       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
907       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
908       ierr = MatDestroy(&B);CHKERRQ(ierr);
909     } else {
910       PetscScalar *marr;
911 
912       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
913       for (i=0;i<n_constraints;i++) {
914         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
915         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
916         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
917         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
919       }
920       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
921     }
922     if (!pcbddc->switch_static) {
923       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
924       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
925       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
926       for (i=0;i<n_constraints;i++) {
927         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
928         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
929         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
930         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
932         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
933       }
934       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
935       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
936       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
937     } else {
938       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
939       pcbddc->local_auxmat2 = local_auxmat2_R;
940       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
941     }
942     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
943     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
944     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
945     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
946     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
947     if (isCHOL) {
948       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
949     } else {
950       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
951     }
952     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
953     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
954     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
955     ierr = MatDestroy(&M2);CHKERRQ(ierr);
956     ierr = MatDestroy(&M3);CHKERRQ(ierr);
957     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
958     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
959     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
960     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
961     ierr = MatDestroy(&M1);CHKERRQ(ierr);
962   }
963   /* Get submatrices from subdomain matrix */
964   if (n_benign) {
965     IS        dummy;
966     Mat       B0_R;
967     PetscReal norm;
968     PetscInt  ii[2];
969 
970     ii[0] = 0;
971     ii[1] = pcbddc->B0_ncol;
972     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
973     ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
974     ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
975     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
976     if (norm > PETSC_SMALL) {
977       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
978     }
979     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
980     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
981   }
982 
983   if (n_vertices) {
984     IS is_aux;
985 
986     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
987       IS tis;
988 
989       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
990       ierr = ISSort(tis);CHKERRQ(ierr);
991       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
992       ierr = ISDestroy(&tis);CHKERRQ(ierr);
993     } else {
994       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
995     }
996     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
997     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
998     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
999     if (n_benign) {
1000       IS dummy;
1001 
1002       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
1003       ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1004       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1005     }
1006     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1007   }
1008 
1009   /* Matrix of coarse basis functions (local) */
1010   if (pcbddc->coarse_phi_B) {
1011     PetscInt on_B,on_primal,on_D=n_D;
1012     if (pcbddc->coarse_phi_D) {
1013       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1014     }
1015     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1016     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1017       PetscScalar *marray;
1018 
1019       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1020       ierr = PetscFree(marray);CHKERRQ(ierr);
1021       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1022       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1023       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1024       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1025     }
1026   }
1027 
1028   if (!pcbddc->coarse_phi_B) {
1029     PetscScalar *marray;
1030 
1031     n = n_B*pcbddc->local_primal_size;
1032     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1033       n += n_D*pcbddc->local_primal_size;
1034     }
1035     if (!pcbddc->symmetric_primal) {
1036       n *= 2;
1037     }
1038     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1039     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1040     n = n_B*pcbddc->local_primal_size;
1041     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1042       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1043       n += n_D*pcbddc->local_primal_size;
1044     }
1045     if (!pcbddc->symmetric_primal) {
1046       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1047       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1048         n = n_B*pcbddc->local_primal_size;
1049         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1050       }
1051     } else {
1052       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1053       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1054       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1055         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1056         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1057       }
1058     }
1059   }
1060 
1061   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1062   if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1063     const PetscInt *idxs;
1064 
1065     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1066     ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr);
1067     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1068   }
1069 
1070   /* vertices */
1071   if (n_vertices) {
1072 
1073     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1074 
1075     if (n_R) {
1076       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1077       PetscBLASInt B_N,B_one = 1;
1078       PetscScalar  *x,*y;
1079       PetscBool    isseqaij;
1080 
1081       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1082       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1083       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1084       if (F) { /* TODO could be optimized for symmetric problems */
1085         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1086       } else {
1087         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1088         for (i=0;i<n_vertices;i++) {
1089           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
1090           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1091           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1092           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1093           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1094         }
1095         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1096       }
1097       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1098       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
1099       if (n_constraints) {
1100         Mat B;
1101 
1102         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1103         for (i=0;i<n_vertices;i++) {
1104           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1105           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
1106           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1109           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1110         }
1111         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1112         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1113         ierr = MatDestroy(&B);CHKERRQ(ierr);
1114         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1115         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1116         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1117         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
1118         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
1119         ierr = MatDestroy(&B);CHKERRQ(ierr);
1120       }
1121       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1122       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1123         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1124       }
1125       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1126       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1127       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1128       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1129       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1130       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1131       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1132       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1133       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1134       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1135     } else {
1136       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1137     }
1138     if (n_benign) {
1139       const PetscScalar *vals;
1140       const PetscInt    *idxs;
1141       PetscInt          n,j;
1142 
1143       ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
1144       for (j=0;j<n;j++) {
1145         coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j];
1146         coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j];
1147       }
1148       ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
1149     }
1150     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1151 
1152     /* coarse basis functions */
1153     for (i=0;i<n_vertices;i++) {
1154       PetscScalar *y;
1155 
1156       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1157       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1158       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1159       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161       y[n_B*i+idx_V_B[i]] = 1.0;
1162       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1163       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1164 
1165       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1166         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1167         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1168         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1171         if (n_benign) y[n_D*i+p0_lidx_I] = 0.0;
1172         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1173       }
1174       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1175     }
1176     /* if n_R == 0 the object is not destroyed */
1177     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1178   }
1179 
1180   if (n_constraints) {
1181     Mat B;
1182 
1183     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1184     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1185     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1186     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1187     if (n_vertices) {
1188       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1189         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1190       } else {
1191         Mat S_VCt;
1192 
1193         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1194         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1195         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1196       }
1197     }
1198     ierr = MatDestroy(&B);CHKERRQ(ierr);
1199     /* coarse basis functions */
1200     for (i=0;i<n_constraints;i++) {
1201       PetscScalar *y;
1202 
1203       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1204       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1205       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1206       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1208       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1209       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1210       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1211         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1212         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1213         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1215         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1216         if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0;
1217         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1218       }
1219       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1220     }
1221   }
1222   if (n_constraints) {
1223     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1224   }
1225 
1226   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1227 
1228   /* compute other basis functions for non-symmetric problems */
1229   if (!pcbddc->symmetric_primal) {
1230 
1231     if (n_constraints) {
1232       Mat S_CCT,B_C;
1233 
1234       /* this is a lazy thing */
1235       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1236       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1237       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1238       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1239       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1240       if (n_vertices) {
1241         Mat B_V,S_VCT;
1242 
1243         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1244         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1245         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1246         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1247         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1248       }
1249       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1250     } else { /* if there are no constraints, reset work */
1251       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1252     }
1253     if (n_vertices && n_R) {
1254       Mat          A_VRT;
1255       PetscScalar  *marray;
1256       PetscBLASInt B_N,B_one = 1;
1257 
1258       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1259       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1260       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1261       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1262       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1263       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1264       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1265     }
1266 
1267     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1268       for (i=0;i<pcbddc->local_primal_size;i++) {
1269         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1270         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1271         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1272         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1273         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1274       }
1275     } else {
1276       for (i=0;i<pcbddc->local_primal_size;i++) {
1277         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1278         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1279         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1280         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1281         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1282       }
1283     }
1284     /* coarse basis functions */
1285     for (i=0;i<pcbddc->local_primal_size;i++) {
1286       PetscScalar *y;
1287 
1288       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1289       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1290       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1291       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1292       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293       if (i<n_vertices) {
1294         y[n_B*i+idx_V_B[i]] = 1.0;
1295       }
1296       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1297       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1298 
1299       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1300         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1301         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1302         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1303         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1305         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1306       }
1307       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1308     }
1309   }
1310   /* free memory */
1311   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1312   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1313   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1314   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1315   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1316   ierr = PetscFree(work);CHKERRQ(ierr);
1317   if (n_vertices) {
1318     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1319   }
1320   if (n_constraints) {
1321     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1322   }
1323   /* Checking coarse_sub_mat and coarse basis functios */
1324   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1325   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1326   if (pcbddc->dbg_flag) {
1327     Mat         coarse_sub_mat;
1328     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1329     Mat         coarse_phi_D,coarse_phi_B;
1330     Mat         coarse_psi_D,coarse_psi_B;
1331     Mat         A_II,A_BB,A_IB,A_BI;
1332     Mat         C_B,CPHI;
1333     IS          is_dummy;
1334     Vec         mones;
1335     MatType     checkmattype=MATSEQAIJ;
1336     PetscReal   real_value;
1337 
1338     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1339     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1340     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1341     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1342     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1343     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1344     if (unsymmetric_check) {
1345       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1346       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1347     }
1348     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1349 
1350     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1351     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1352     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1353     if (unsymmetric_check) {
1354       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1355       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1356       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1357       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1358       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1359       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1360       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1361       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1362       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1363       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1364       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1365       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1366     } else {
1367       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1368       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1369       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1370       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1371       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1372       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1373       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1374       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1375     }
1376     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1377     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1378     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1379     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1380     if (n_benign) {
1381       Mat B0_I,B0_B,B0_BPHI,B0_IPHI;
1382       PetscScalar *data,*data2;
1383 
1384       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr);
1385       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1386       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1387       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1388       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1389       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1390       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1391       for (i=0;i<pcbddc->local_primal_size;i++) {
1392         data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i];
1393         data[(i+1)*pcbddc->local_primal_size-1] += data2[i];
1394       }
1395       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1396       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1397       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1398       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1399       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1400       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1401       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1402       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1403       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1404     }
1405 #if 0
1406   {
1407     PetscViewer viewer;
1408     char filename[256];
1409     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1410     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1411     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1412     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1413     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1414   }
1415 #endif
1416     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1417     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1418     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1419     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1420 
1421     /* check constraints */
1422     if (!n_benign) {
1423       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1424       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1425       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1426       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1427       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1428       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1429       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1430       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1431       if (unsymmetric_check) {
1432         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1433         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1434         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1435         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1436         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1437       }
1438       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1439       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1440       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1441       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1442     }
1443     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1444     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1445     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1446     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1447     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1448     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1449     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1450     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1451     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1452     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1453     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1454     if (unsymmetric_check) {
1455       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1456       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1457     }
1458     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1459   }
1460   ierr = MatDestroy(&B0);CHKERRQ(ierr);
1461   /* get back data */
1462   *coarse_submat_vals_n = coarse_submat_vals;
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1468 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1469 {
1470   Mat            *work_mat;
1471   IS             isrow_s,iscol_s;
1472   PetscBool      rsorted,csorted;
1473   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1474   PetscErrorCode ierr;
1475 
1476   PetscFunctionBegin;
1477   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1478   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1479   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1480   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1481 
1482   if (!rsorted) {
1483     const PetscInt *idxs;
1484     PetscInt *idxs_sorted,i;
1485 
1486     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1487     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1488     for (i=0;i<rsize;i++) {
1489       idxs_perm_r[i] = i;
1490     }
1491     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1492     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1493     for (i=0;i<rsize;i++) {
1494       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1495     }
1496     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1497     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1498   } else {
1499     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1500     isrow_s = isrow;
1501   }
1502 
1503   if (!csorted) {
1504     if (isrow == iscol) {
1505       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1506       iscol_s = isrow_s;
1507     } else {
1508       const PetscInt *idxs;
1509       PetscInt *idxs_sorted,i;
1510 
1511       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1512       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1513       for (i=0;i<csize;i++) {
1514         idxs_perm_c[i] = i;
1515       }
1516       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1517       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1518       for (i=0;i<csize;i++) {
1519         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1520       }
1521       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1522       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1523     }
1524   } else {
1525     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1526     iscol_s = iscol;
1527   }
1528 
1529   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1530 
1531   if (!rsorted || !csorted) {
1532     Mat      new_mat;
1533     IS       is_perm_r,is_perm_c;
1534 
1535     if (!rsorted) {
1536       PetscInt *idxs_r,i;
1537       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1538       for (i=0;i<rsize;i++) {
1539         idxs_r[idxs_perm_r[i]] = i;
1540       }
1541       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1542       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1543     } else {
1544       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1545     }
1546     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1547 
1548     if (!csorted) {
1549       if (isrow_s == iscol_s) {
1550         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1551         is_perm_c = is_perm_r;
1552       } else {
1553         PetscInt *idxs_c,i;
1554         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1555         for (i=0;i<csize;i++) {
1556           idxs_c[idxs_perm_c[i]] = i;
1557         }
1558         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1559         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1560       }
1561     } else {
1562       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1563     }
1564     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1565 
1566     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1567     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1568     work_mat[0] = new_mat;
1569     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1570     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1571   }
1572 
1573   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1574   *B = work_mat[0];
1575   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1576   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1577   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1583 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1584 {
1585   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1586   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1587   Mat            new_mat;
1588   IS             is_local,is_global;
1589   PetscInt       local_size;
1590   PetscBool      isseqaij;
1591   PetscErrorCode ierr;
1592 
1593   PetscFunctionBegin;
1594   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1595   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1596   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1597   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1598   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1599   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1600   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1601 
1602   /* check */
1603   if (pcbddc->dbg_flag) {
1604     Vec       x,x_change;
1605     PetscReal error;
1606 
1607     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1608     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1609     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1610     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1611     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1613     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1614     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1615     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1616     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1617     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1618     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1619     ierr = VecDestroy(&x);CHKERRQ(ierr);
1620     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1621   }
1622 
1623   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1624   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1625   if (isseqaij) {
1626     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1627   } else {
1628     Mat work_mat;
1629     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1630     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1631     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1632   }
1633   if (matis->A->symmetric_set) {
1634     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1635 #if !defined(PETSC_USE_COMPLEX)
1636     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1637 #endif
1638   }
1639   /*
1640   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1641   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1642   */
1643   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1649 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1650 {
1651   PC_IS*          pcis = (PC_IS*)(pc->data);
1652   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1653   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1654   PetscInt        *idx_R_local=NULL;
1655   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1656   PetscInt        vbs,bs;
1657   PetscBT         bitmask=NULL;
1658   PetscErrorCode  ierr;
1659 
1660   PetscFunctionBegin;
1661   /*
1662     No need to setup local scatters if
1663       - primal space is unchanged
1664         AND
1665       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1666         AND
1667       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1668   */
1669   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1670     PetscFunctionReturn(0);
1671   }
1672   /* destroy old objects */
1673   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1674   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1675   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1676   /* Set Non-overlapping dimensions */
1677   n_B = pcis->n_B;
1678   n_D = pcis->n - n_B;
1679   n_vertices = pcbddc->n_vertices;
1680 
1681   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1682 
1683   /* create auxiliary bitmask and allocate workspace */
1684   if (!sub_schurs->reuse_mumps) {
1685     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1686     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1687     for (i=0;i<n_vertices;i++) {
1688       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1689     }
1690 
1691     for (i=0, n_R=0; i<pcis->n; i++) {
1692       if (!PetscBTLookup(bitmask,i)) {
1693         idx_R_local[n_R++] = i;
1694       }
1695     }
1696   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1697     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1698 
1699     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1700     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1701   }
1702 
1703   /* Block code */
1704   vbs = 1;
1705   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1706   if (bs>1 && !(n_vertices%bs)) {
1707     PetscBool is_blocked = PETSC_TRUE;
1708     PetscInt  *vary;
1709     if (!sub_schurs->reuse_mumps) {
1710       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1711       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1712       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1713       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1714       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1715       for (i=0; i<pcis->n/bs; i++) {
1716         if (vary[i]!=0 && vary[i]!=bs) {
1717           is_blocked = PETSC_FALSE;
1718           break;
1719         }
1720       }
1721       ierr = PetscFree(vary);CHKERRQ(ierr);
1722     } else {
1723       /* Verify directly the R set */
1724       for (i=0; i<n_R/bs; i++) {
1725         PetscInt j,node=idx_R_local[bs*i];
1726         for (j=1; j<bs; j++) {
1727           if (node != idx_R_local[bs*i+j]-j) {
1728             is_blocked = PETSC_FALSE;
1729             break;
1730           }
1731         }
1732       }
1733     }
1734     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1735       vbs = bs;
1736       for (i=0;i<n_R/vbs;i++) {
1737         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1738       }
1739     }
1740   }
1741   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1742   if (sub_schurs->reuse_mumps) {
1743     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1744 
1745     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1746     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1747     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1748     reuse_mumps->is_R = pcbddc->is_R_local;
1749   } else {
1750     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1751   }
1752 
1753   /* print some info if requested */
1754   if (pcbddc->dbg_flag) {
1755     PetscInt benign = 0;
1756 
1757     if (pcbddc->benign_p0_lidx >= 0) benign = 1;
1758     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1759     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1760     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1761     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1762     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1763     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr);
1764     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1765   }
1766 
1767   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1768   if (!sub_schurs->reuse_mumps) {
1769     IS       is_aux1,is_aux2;
1770     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1771 
1772     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1773     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1774     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1775     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1776     for (i=0; i<n_D; i++) {
1777       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1778     }
1779     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1780     for (i=0, j=0; i<n_R; i++) {
1781       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1782         aux_array1[j++] = i;
1783       }
1784     }
1785     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1786     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1787     for (i=0, j=0; i<n_B; i++) {
1788       if (!PetscBTLookup(bitmask,is_indices[i])) {
1789         aux_array2[j++] = i;
1790       }
1791     }
1792     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1793     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1794     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1795     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1796     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1797 
1798     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1799       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1800       for (i=0, j=0; i<n_R; i++) {
1801         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1802           aux_array1[j++] = i;
1803         }
1804       }
1805       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1806       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1807       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1808     }
1809     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1810     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1811   } else {
1812     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1813     IS               tis;
1814     PetscInt         schur_size;
1815 
1816     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1817     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1818     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1819     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1820     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1821       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1822       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1823       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1824     }
1825   }
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1832 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1833 {
1834   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1835   PC_IS          *pcis = (PC_IS*)pc->data;
1836   PC             pc_temp;
1837   Mat            A_RR;
1838   MatReuse       reuse;
1839   PetscScalar    m_one = -1.0;
1840   PetscReal      value;
1841   PetscInt       n_D,n_R;
1842   PetscBool      use_exact,use_exact_reduced,issbaij;
1843   PetscErrorCode ierr;
1844   /* prefixes stuff */
1845   char           dir_prefix[256],neu_prefix[256],str_level[16];
1846   size_t         len;
1847 
1848   PetscFunctionBegin;
1849 
1850   /* compute prefixes */
1851   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1852   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1853   if (!pcbddc->current_level) {
1854     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1855     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1856     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1857     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1858   } else {
1859     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1860     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1861     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1862     len -= 15; /* remove "pc_bddc_coarse_" */
1863     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1864     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1865     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1866     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1867     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1868     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1869     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1870     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1871   }
1872 
1873   /* DIRICHLET PROBLEM */
1874   if (dirichlet) {
1875     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1876     if (pcbddc->local_mat->symmetric_set) {
1877       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1878     }
1879     /* Matrix for Dirichlet problem is pcis->A_II */
1880     n_D = pcis->n - pcis->n_B;
1881     if (!pcbddc->ksp_D) { /* create object if not yet build */
1882       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1883       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1884       /* default */
1885       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1886       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1887       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1888       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1889       if (issbaij) {
1890         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1891       } else {
1892         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1893       }
1894       /* Allow user's customization */
1895       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1896       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1897     }
1898     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1899     if (sub_schurs->reuse_mumps) {
1900       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1901 
1902       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1903     }
1904     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1905     if (!n_D) {
1906       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1907       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1908     }
1909     /* Set Up KSP for Dirichlet problem of BDDC */
1910     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1911     /* set ksp_D into pcis data */
1912     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1913     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1914     pcis->ksp_D = pcbddc->ksp_D;
1915   }
1916 
1917   /* NEUMANN PROBLEM */
1918   A_RR = 0;
1919   if (neumann) {
1920     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1921     PetscInt        ibs,mbs;
1922     PetscBool       issbaij;
1923     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1924     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1925     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1926     if (pcbddc->ksp_R) { /* already created ksp */
1927       PetscInt nn_R;
1928       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1929       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1930       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1931       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1932         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1933         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1934         reuse = MAT_INITIAL_MATRIX;
1935       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1936         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1937           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1938           reuse = MAT_INITIAL_MATRIX;
1939         } else { /* safe to reuse the matrix */
1940           reuse = MAT_REUSE_MATRIX;
1941         }
1942       }
1943       /* last check */
1944       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1945         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1946         reuse = MAT_INITIAL_MATRIX;
1947       }
1948     } else { /* first time, so we need to create the matrix */
1949       reuse = MAT_INITIAL_MATRIX;
1950     }
1951     /* extract A_RR */
1952     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1953     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1954     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1955     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1956       if (matis->A == pcbddc->local_mat) {
1957         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1958         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1959       } else {
1960         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1961       }
1962     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1963       if (matis->A == pcbddc->local_mat) {
1964         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1965         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1966       } else {
1967         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1968       }
1969     }
1970     if (!sub_schurs->reuse_mumps) {
1971       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1972       if (pcbddc->local_mat->symmetric_set) {
1973         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1974       }
1975     } else {
1976       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1977 
1978       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1979       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1980       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1981     }
1982     if (!pcbddc->ksp_R) { /* create object if not present */
1983       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1984       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1985       /* default */
1986       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1987       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1988       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1989       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1990       if (issbaij) {
1991         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1992       } else {
1993         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1994       }
1995       /* Allow user's customization */
1996       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1997       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1998     }
1999     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2000     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2001     if (!n_R) {
2002       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2003       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2004     }
2005     /* Reuse MUMPS solver if it is present */
2006     if (sub_schurs->reuse_mumps) {
2007       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2008 
2009       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2010     }
2011     /* Set Up KSP for Neumann problem of BDDC */
2012     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2013   }
2014   /* free Neumann problem's matrix */
2015   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2016 
2017   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2018   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2019     if (pcbddc->dbg_flag) {
2020       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2021       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2022       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2023     }
2024     if (dirichlet) { /* Dirichlet */
2025       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2026       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2027       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2028       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2029       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2030       /* need to be adapted? */
2031       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2032       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2033       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2034       /* print info */
2035       if (pcbddc->dbg_flag) {
2036         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2037         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2038       }
2039       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2040         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2041       }
2042     }
2043     if (neumann) { /* Neumann */
2044       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2045       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2046       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2047       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2048       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2049       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2050       /* need to be adapted? */
2051       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2052       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2053       /* print info */
2054       if (pcbddc->dbg_flag) {
2055         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2056         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2057       }
2058       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2059         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2060       }
2061     }
2062   }
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2068 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2069 {
2070   PetscErrorCode  ierr;
2071   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2072   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2073 
2074   PetscFunctionBegin;
2075   if (!sub_schurs->reuse_mumps) {
2076     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2077   }
2078   if (!pcbddc->switch_static) {
2079     if (applytranspose && pcbddc->local_auxmat1) {
2080       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2081       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2082     }
2083     if (!sub_schurs->reuse_mumps) {
2084       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2085       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2086     } else {
2087       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2088 
2089       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2090       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2091     }
2092   } else {
2093     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2094     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2095     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2096     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2097     if (applytranspose && pcbddc->local_auxmat1) {
2098       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2099       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2100       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2101       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2102     }
2103   }
2104   if (!sub_schurs->reuse_mumps) {
2105     if (applytranspose) {
2106       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2107     } else {
2108       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2109     }
2110 #if defined(PETSC_HAVE_MUMPS)
2111   } else {
2112     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2113 
2114     if (applytranspose) {
2115       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2116     } else {
2117       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2118     }
2119 #endif
2120   }
2121   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2122   if (!pcbddc->switch_static) {
2123     if (!sub_schurs->reuse_mumps) {
2124       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2125       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2126     } else {
2127       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2128 
2129       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2130       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2131     }
2132     if (!applytranspose && pcbddc->local_auxmat1) {
2133       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2134       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2135     }
2136   } else {
2137     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2138     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2139     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2140     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2141     if (!applytranspose && pcbddc->local_auxmat1) {
2142       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2143       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2144     }
2145     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2146     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2147     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2148     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2154 #undef __FUNCT__
2155 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2156 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2157 {
2158   PetscErrorCode ierr;
2159   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2160   PC_IS*            pcis = (PC_IS*)  (pc->data);
2161   const PetscScalar zero = 0.0;
2162 
2163   PetscFunctionBegin;
2164   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2165   if (applytranspose) {
2166     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2167     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2168   } else {
2169     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2170     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2171   }
2172 
2173   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2174   if (pcbddc->benign_p0_lidx >= 0) {
2175     PetscScalar *array;
2176 
2177     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2178     array[pcbddc->local_primal_size-1] += pcbddc->benign_p0;
2179     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2180   }
2181 
2182   /* start communications from local primal nodes to rhs of coarse solver */
2183   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2184   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2185   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2186 
2187   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2188   /* TODO remove null space when doing multilevel */
2189   if (pcbddc->coarse_ksp) {
2190     Vec rhs,sol;
2191 
2192     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2193     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2194     if (applytranspose) {
2195       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2196     } else {
2197       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2198     }
2199   }
2200 
2201   /* Local solution on R nodes */
2202   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2203     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2204   }
2205 
2206   /* communications from coarse sol to local primal nodes */
2207   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2208   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2209 
2210   /* Sum contributions from two levels */
2211   if (applytranspose) {
2212     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2213     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2214   } else {
2215     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2216     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2217   }
2218   /* store p0 */
2219   if (pcbddc->benign_p0_lidx >= 0) {
2220     PetscScalar *array;
2221 
2222     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2223     pcbddc->benign_p0 = array[pcbddc->local_primal_size-1];
2224     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2225   }
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2231 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2232 {
2233   PetscErrorCode ierr;
2234   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2235   PetscScalar    *array;
2236   Vec            from,to;
2237 
2238   PetscFunctionBegin;
2239   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2240     from = pcbddc->coarse_vec;
2241     to = pcbddc->vec1_P;
2242     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2243       Vec tvec;
2244 
2245       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2246       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2247       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2248       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2249       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2250       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2251     }
2252   } else { /* from local to global -> put data in coarse right hand side */
2253     from = pcbddc->vec1_P;
2254     to = pcbddc->coarse_vec;
2255   }
2256   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2262 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2263 {
2264   PetscErrorCode ierr;
2265   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2266   PetscScalar    *array;
2267   Vec            from,to;
2268 
2269   PetscFunctionBegin;
2270   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2271     from = pcbddc->coarse_vec;
2272     to = pcbddc->vec1_P;
2273   } else { /* from local to global -> put data in coarse right hand side */
2274     from = pcbddc->vec1_P;
2275     to = pcbddc->coarse_vec;
2276   }
2277   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2278   if (smode == SCATTER_FORWARD) {
2279     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2280       Vec tvec;
2281 
2282       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2283       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2284       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2285       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2286     }
2287   } else {
2288     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2289      ierr = VecResetArray(from);CHKERRQ(ierr);
2290     }
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 /* uncomment for testing purposes */
2296 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2297 #undef __FUNCT__
2298 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2299 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2300 {
2301   PetscErrorCode    ierr;
2302   PC_IS*            pcis = (PC_IS*)(pc->data);
2303   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2304   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2305   /* one and zero */
2306   PetscScalar       one=1.0,zero=0.0;
2307   /* space to store constraints and their local indices */
2308   PetscScalar       *constraints_data;
2309   PetscInt          *constraints_idxs,*constraints_idxs_B;
2310   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2311   PetscInt          *constraints_n;
2312   /* iterators */
2313   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2314   /* BLAS integers */
2315   PetscBLASInt      lwork,lierr;
2316   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2317   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2318   /* reuse */
2319   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2320   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2321   /* change of basis */
2322   PetscBool         qr_needed;
2323   PetscBT           change_basis,qr_needed_idx;
2324   /* auxiliary stuff */
2325   PetscInt          *nnz,*is_indices;
2326   PetscInt          ncc;
2327   /* some quantities */
2328   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2329   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2330 
2331   PetscFunctionBegin;
2332   /* Destroy Mat objects computed previously */
2333   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2334   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2335   /* save info on constraints from previous setup (if any) */
2336   olocal_primal_size = pcbddc->local_primal_size;
2337   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2338   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2339   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2340   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2341   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2342   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2343 
2344   /* print some info */
2345   if (pcbddc->dbg_flag) {
2346     IS       vertices;
2347     PetscInt nv,nedges,nfaces;
2348     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2349     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2350     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2351     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2352     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2353     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2354     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2355     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2356     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2357   }
2358 
2359   if (!pcbddc->adaptive_selection) {
2360     IS           ISForVertices,*ISForFaces,*ISForEdges;
2361     MatNullSpace nearnullsp;
2362     const Vec    *nearnullvecs;
2363     Vec          *localnearnullsp;
2364     PetscScalar  *array;
2365     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2366     PetscBool    nnsp_has_cnst;
2367     /* LAPACK working arrays for SVD or POD */
2368     PetscBool    skip_lapack,boolforchange;
2369     PetscScalar  *work;
2370     PetscReal    *singular_vals;
2371 #if defined(PETSC_USE_COMPLEX)
2372     PetscReal    *rwork;
2373 #endif
2374 #if defined(PETSC_MISSING_LAPACK_GESVD)
2375     PetscScalar  *temp_basis,*correlation_mat;
2376 #else
2377     PetscBLASInt dummy_int=1;
2378     PetscScalar  dummy_scalar=1.;
2379 #endif
2380 
2381     /* Get index sets for faces, edges and vertices from graph */
2382     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2383     /* free unneeded index sets */
2384     if (!pcbddc->use_vertices) {
2385       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2386     }
2387     if (!pcbddc->use_edges) {
2388       for (i=0;i<n_ISForEdges;i++) {
2389         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2390       }
2391       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2392       n_ISForEdges = 0;
2393     }
2394     if (!pcbddc->use_faces) {
2395       for (i=0;i<n_ISForFaces;i++) {
2396         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2397       }
2398       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2399       n_ISForFaces = 0;
2400     }
2401 
2402 #if defined(PETSC_USE_DEBUG)
2403     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2404        Also use_change_of_basis should be consistent among processors */
2405     if (pcbddc->NullSpace) {
2406       PetscBool tbool[2],gbool[2];
2407 
2408       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2409         pcbddc->use_change_of_basis = PETSC_TRUE;
2410         if (!ISForEdges) {
2411           pcbddc->use_change_on_faces = PETSC_TRUE;
2412         }
2413       }
2414       tbool[0] = pcbddc->use_change_of_basis;
2415       tbool[1] = pcbddc->use_change_on_faces;
2416       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2417       pcbddc->use_change_of_basis = gbool[0];
2418       pcbddc->use_change_on_faces = gbool[1];
2419     }
2420 #endif
2421 
2422     /* check if near null space is attached to global mat */
2423     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2424     if (nearnullsp) {
2425       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2426       /* remove any stored info */
2427       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2428       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2429       /* store information for BDDC solver reuse */
2430       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2431       pcbddc->onearnullspace = nearnullsp;
2432       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2433       for (i=0;i<nnsp_size;i++) {
2434         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2435       }
2436     } else { /* if near null space is not provided BDDC uses constants by default */
2437       nnsp_size = 0;
2438       nnsp_has_cnst = PETSC_TRUE;
2439     }
2440     /* get max number of constraints on a single cc */
2441     max_constraints = nnsp_size;
2442     if (nnsp_has_cnst) max_constraints++;
2443 
2444     /*
2445          Evaluate maximum storage size needed by the procedure
2446          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2447          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2448          There can be multiple constraints per connected component
2449                                                                                                                                                            */
2450     n_vertices = 0;
2451     if (ISForVertices) {
2452       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2453     }
2454     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2455     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2456 
2457     total_counts = n_ISForFaces+n_ISForEdges;
2458     total_counts *= max_constraints;
2459     total_counts += n_vertices;
2460     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2461 
2462     total_counts = 0;
2463     max_size_of_constraint = 0;
2464     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2465       IS used_is;
2466       if (i<n_ISForEdges) {
2467         used_is = ISForEdges[i];
2468       } else {
2469         used_is = ISForFaces[i-n_ISForEdges];
2470       }
2471       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2472       total_counts += j;
2473       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2474     }
2475     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2476 
2477     /* get local part of global near null space vectors */
2478     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2479     for (k=0;k<nnsp_size;k++) {
2480       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2481       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2482       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2483     }
2484 
2485     /* whether or not to skip lapack calls */
2486     skip_lapack = PETSC_TRUE;
2487     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2488 
2489     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2490     if (!skip_lapack) {
2491       PetscScalar temp_work;
2492 
2493 #if defined(PETSC_MISSING_LAPACK_GESVD)
2494       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2495       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2496       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2497       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2498 #if defined(PETSC_USE_COMPLEX)
2499       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2500 #endif
2501       /* now we evaluate the optimal workspace using query with lwork=-1 */
2502       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2503       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2504       lwork = -1;
2505       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2506 #if !defined(PETSC_USE_COMPLEX)
2507       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2508 #else
2509       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2510 #endif
2511       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2512       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2513 #else /* on missing GESVD */
2514       /* SVD */
2515       PetscInt max_n,min_n;
2516       max_n = max_size_of_constraint;
2517       min_n = max_constraints;
2518       if (max_size_of_constraint < max_constraints) {
2519         min_n = max_size_of_constraint;
2520         max_n = max_constraints;
2521       }
2522       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2523 #if defined(PETSC_USE_COMPLEX)
2524       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2525 #endif
2526       /* now we evaluate the optimal workspace using query with lwork=-1 */
2527       lwork = -1;
2528       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2529       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2530       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2531       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2532 #if !defined(PETSC_USE_COMPLEX)
2533       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2534 #else
2535       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2536 #endif
2537       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2538       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2539 #endif /* on missing GESVD */
2540       /* Allocate optimal workspace */
2541       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2542       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2543     }
2544     /* Now we can loop on constraining sets */
2545     total_counts = 0;
2546     constraints_idxs_ptr[0] = 0;
2547     constraints_data_ptr[0] = 0;
2548     /* vertices */
2549     if (n_vertices) {
2550       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2551       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2552       for (i=0;i<n_vertices;i++) {
2553         constraints_n[total_counts] = 1;
2554         constraints_data[total_counts] = 1.0;
2555         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2556         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2557         total_counts++;
2558       }
2559       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2560       n_vertices = total_counts;
2561     }
2562 
2563     /* edges and faces */
2564     total_counts_cc = total_counts;
2565     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2566       IS        used_is;
2567       PetscBool idxs_copied = PETSC_FALSE;
2568 
2569       if (ncc<n_ISForEdges) {
2570         used_is = ISForEdges[ncc];
2571         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2572       } else {
2573         used_is = ISForFaces[ncc-n_ISForEdges];
2574         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2575       }
2576       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2577 
2578       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2579       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2580       /* change of basis should not be performed on local periodic nodes */
2581       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2582       if (nnsp_has_cnst) {
2583         PetscScalar quad_value;
2584 
2585         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2586         idxs_copied = PETSC_TRUE;
2587 
2588         if (!pcbddc->use_nnsp_true) {
2589           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2590         } else {
2591           quad_value = 1.0;
2592         }
2593         for (j=0;j<size_of_constraint;j++) {
2594           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2595         }
2596         temp_constraints++;
2597         total_counts++;
2598       }
2599       for (k=0;k<nnsp_size;k++) {
2600         PetscReal real_value;
2601         PetscScalar *ptr_to_data;
2602 
2603         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2604         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2605         for (j=0;j<size_of_constraint;j++) {
2606           ptr_to_data[j] = array[is_indices[j]];
2607         }
2608         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2609         /* check if array is null on the connected component */
2610         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2611         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2612         if (real_value > 0.0) { /* keep indices and values */
2613           temp_constraints++;
2614           total_counts++;
2615           if (!idxs_copied) {
2616             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2617             idxs_copied = PETSC_TRUE;
2618           }
2619         }
2620       }
2621       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2622       valid_constraints = temp_constraints;
2623       if (!pcbddc->use_nnsp_true && temp_constraints) {
2624         if (temp_constraints == 1) { /* just normalize the constraint */
2625           PetscScalar norm,*ptr_to_data;
2626 
2627           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2628           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2629           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2630           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2631           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2632         } else { /* perform SVD */
2633           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2634           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2635 
2636 #if defined(PETSC_MISSING_LAPACK_GESVD)
2637           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2638              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2639              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2640                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2641                 from that computed using LAPACKgesvd
2642              -> This is due to a different computation of eigenvectors in LAPACKheev
2643              -> The quality of the POD-computed basis will be the same */
2644           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2645           /* Store upper triangular part of correlation matrix */
2646           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2647           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2648           for (j=0;j<temp_constraints;j++) {
2649             for (k=0;k<j+1;k++) {
2650               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2651             }
2652           }
2653           /* compute eigenvalues and eigenvectors of correlation matrix */
2654           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2655           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2656 #if !defined(PETSC_USE_COMPLEX)
2657           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2658 #else
2659           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2660 #endif
2661           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2662           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2663           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2664           j = 0;
2665           while (j < temp_constraints && singular_vals[j] < tol) j++;
2666           total_counts = total_counts-j;
2667           valid_constraints = temp_constraints-j;
2668           /* scale and copy POD basis into used quadrature memory */
2669           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2670           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2671           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2672           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2673           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2674           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2675           if (j<temp_constraints) {
2676             PetscInt ii;
2677             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2678             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2679             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2680             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2681             for (k=0;k<temp_constraints-j;k++) {
2682               for (ii=0;ii<size_of_constraint;ii++) {
2683                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2684               }
2685             }
2686           }
2687 #else  /* on missing GESVD */
2688           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2689           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2690           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2691           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2692 #if !defined(PETSC_USE_COMPLEX)
2693           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2694 #else
2695           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2696 #endif
2697           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2698           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2699           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2700           k = temp_constraints;
2701           if (k > size_of_constraint) k = size_of_constraint;
2702           j = 0;
2703           while (j < k && singular_vals[k-j-1] < tol) j++;
2704           valid_constraints = k-j;
2705           total_counts = total_counts-temp_constraints+valid_constraints;
2706 #endif /* on missing GESVD */
2707         }
2708       }
2709       /* update pointers information */
2710       if (valid_constraints) {
2711         constraints_n[total_counts_cc] = valid_constraints;
2712         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2713         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2714         /* set change_of_basis flag */
2715         if (boolforchange) {
2716           PetscBTSet(change_basis,total_counts_cc);
2717         }
2718         total_counts_cc++;
2719       }
2720     }
2721     /* free workspace */
2722     if (!skip_lapack) {
2723       ierr = PetscFree(work);CHKERRQ(ierr);
2724 #if defined(PETSC_USE_COMPLEX)
2725       ierr = PetscFree(rwork);CHKERRQ(ierr);
2726 #endif
2727       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2728 #if defined(PETSC_MISSING_LAPACK_GESVD)
2729       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2730       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2731 #endif
2732     }
2733     for (k=0;k<nnsp_size;k++) {
2734       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2735     }
2736     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2737     /* free index sets of faces, edges and vertices */
2738     for (i=0;i<n_ISForFaces;i++) {
2739       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2740     }
2741     if (n_ISForFaces) {
2742       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2743     }
2744     for (i=0;i<n_ISForEdges;i++) {
2745       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2746     }
2747     if (n_ISForEdges) {
2748       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2749     }
2750     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2751   } else {
2752     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2753 
2754     total_counts = 0;
2755     n_vertices = 0;
2756     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2757       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2758     }
2759     max_constraints = 0;
2760     total_counts_cc = 0;
2761     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2762       total_counts += pcbddc->adaptive_constraints_n[i];
2763       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2764       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2765     }
2766     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2767     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2768     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2769     constraints_data = pcbddc->adaptive_constraints_data;
2770     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2771     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2772     total_counts_cc = 0;
2773     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2774       if (pcbddc->adaptive_constraints_n[i]) {
2775         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2776       }
2777     }
2778 #if 0
2779     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2780     for (i=0;i<total_counts_cc;i++) {
2781       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2782       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2783       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2784         printf(" %d",constraints_idxs[j]);
2785       }
2786       printf("\n");
2787       printf("number of cc: %d\n",constraints_n[i]);
2788     }
2789     for (i=0;i<n_vertices;i++) {
2790       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2791     }
2792     for (i=0;i<sub_schurs->n_subs;i++) {
2793       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2794     }
2795 #endif
2796 
2797     max_size_of_constraint = 0;
2798     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2799     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2800     /* Change of basis */
2801     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2802     if (pcbddc->use_change_of_basis) {
2803       for (i=0;i<sub_schurs->n_subs;i++) {
2804         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2805           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2806         }
2807       }
2808     }
2809   }
2810   pcbddc->local_primal_size = total_counts;
2811   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2812   ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2813 
2814   /* map constraints_idxs in boundary numbering */
2815   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2816   if (i != constraints_idxs_ptr[total_counts_cc]) {
2817     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2818   }
2819 
2820   /* Create constraint matrix */
2821   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2822   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2823   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2824 
2825   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2826   /* determine if a QR strategy is needed for change of basis */
2827   qr_needed = PETSC_FALSE;
2828   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2829   total_primal_vertices=0;
2830   pcbddc->local_primal_size_cc = 0;
2831   for (i=0;i<total_counts_cc;i++) {
2832     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2833     if (size_of_constraint == 1) {
2834       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2835       pcbddc->local_primal_size_cc += 1;
2836     } else if (PetscBTLookup(change_basis,i)) {
2837       for (k=0;k<constraints_n[i];k++) {
2838         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2839       }
2840       pcbddc->local_primal_size_cc += constraints_n[i];
2841       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2842         PetscBTSet(qr_needed_idx,i);
2843         qr_needed = PETSC_TRUE;
2844       }
2845     } else {
2846       pcbddc->local_primal_size_cc += 1;
2847     }
2848   }
2849   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2850   pcbddc->n_vertices = total_primal_vertices;
2851   /* permute indices in order to have a sorted set of vertices */
2852   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2853 
2854   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2855   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2856   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2857   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2858 
2859   /* nonzero structure of constraint matrix */
2860   /* and get reference dof for local constraints */
2861   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2862   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2863 
2864   j = total_primal_vertices;
2865   total_counts = total_primal_vertices;
2866   cum = total_primal_vertices;
2867   for (i=n_vertices;i<total_counts_cc;i++) {
2868     if (!PetscBTLookup(change_basis,i)) {
2869       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2870       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2871       cum++;
2872       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2873       for (k=0;k<constraints_n[i];k++) {
2874         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2875         nnz[j+k] = size_of_constraint;
2876       }
2877       j += constraints_n[i];
2878     }
2879   }
2880   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2881   ierr = PetscFree(nnz);CHKERRQ(ierr);
2882 
2883   /* set values in constraint matrix */
2884   for (i=0;i<total_primal_vertices;i++) {
2885     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2886   }
2887   total_counts = total_primal_vertices;
2888   for (i=n_vertices;i<total_counts_cc;i++) {
2889     if (!PetscBTLookup(change_basis,i)) {
2890       PetscInt *cols;
2891 
2892       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2893       cols = constraints_idxs+constraints_idxs_ptr[i];
2894       for (k=0;k<constraints_n[i];k++) {
2895         PetscInt    row = total_counts+k;
2896         PetscScalar *vals;
2897 
2898         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2899         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2900       }
2901       total_counts += constraints_n[i];
2902     }
2903   }
2904   /* assembling */
2905   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907 
2908   /*
2909   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2910   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2911   */
2912   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2913   if (pcbddc->use_change_of_basis) {
2914     /* dual and primal dofs on a single cc */
2915     PetscInt     dual_dofs,primal_dofs;
2916     /* working stuff for GEQRF */
2917     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2918     PetscBLASInt lqr_work;
2919     /* working stuff for UNGQR */
2920     PetscScalar  *gqr_work,lgqr_work_t;
2921     PetscBLASInt lgqr_work;
2922     /* working stuff for TRTRS */
2923     PetscScalar  *trs_rhs;
2924     PetscBLASInt Blas_NRHS;
2925     /* pointers for values insertion into change of basis matrix */
2926     PetscInt     *start_rows,*start_cols;
2927     PetscScalar  *start_vals;
2928     /* working stuff for values insertion */
2929     PetscBT      is_primal;
2930     PetscInt     *aux_primal_numbering_B;
2931     /* matrix sizes */
2932     PetscInt     global_size,local_size;
2933     /* temporary change of basis */
2934     Mat          localChangeOfBasisMatrix;
2935     /* extra space for debugging */
2936     PetscScalar  *dbg_work;
2937 
2938     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2939     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2940     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2941     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2942     /* nonzeros for local mat */
2943     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2944     for (i=0;i<pcis->n;i++) nnz[i]=1;
2945     for (i=n_vertices;i<total_counts_cc;i++) {
2946       if (PetscBTLookup(change_basis,i)) {
2947         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2948         if (PetscBTLookup(qr_needed_idx,i)) {
2949           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2950         } else {
2951           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2952           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2953         }
2954       }
2955     }
2956     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2957     ierr = PetscFree(nnz);CHKERRQ(ierr);
2958     /* Set initial identity in the matrix */
2959     for (i=0;i<pcis->n;i++) {
2960       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2961     }
2962 
2963     if (pcbddc->dbg_flag) {
2964       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2965       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2966     }
2967 
2968 
2969     /* Now we loop on the constraints which need a change of basis */
2970     /*
2971        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2972        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2973 
2974        Basic blocks of change of basis matrix T computed by
2975 
2976           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2977 
2978             | 1        0   ...        0         s_1/S |
2979             | 0        1   ...        0         s_2/S |
2980             |              ...                        |
2981             | 0        ...            1     s_{n-1}/S |
2982             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2983 
2984             with S = \sum_{i=1}^n s_i^2
2985             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2986                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2987 
2988           - QR decomposition of constraints otherwise
2989     */
2990     if (qr_needed) {
2991       /* space to store Q */
2992       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2993       /* first we issue queries for optimal work */
2994       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2995       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2996       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2997       lqr_work = -1;
2998       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2999       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3000       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3001       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3002       lgqr_work = -1;
3003       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3004       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3005       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3006       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3007       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3008       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3009       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3010       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3011       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3012       /* array to store scaling factors for reflectors */
3013       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3014       /* array to store rhs and solution of triangular solver */
3015       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3016       /* allocating workspace for check */
3017       if (pcbddc->dbg_flag) {
3018         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3019       }
3020     }
3021     /* array to store whether a node is primal or not */
3022     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3023     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3024     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3025     if (i != total_primal_vertices) {
3026       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3027     }
3028     for (i=0;i<total_primal_vertices;i++) {
3029       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3030     }
3031     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3032 
3033     /* loop on constraints and see whether or not they need a change of basis and compute it */
3034     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3035       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3036       if (PetscBTLookup(change_basis,total_counts)) {
3037         /* get constraint info */
3038         primal_dofs = constraints_n[total_counts];
3039         dual_dofs = size_of_constraint-primal_dofs;
3040 
3041         if (pcbddc->dbg_flag) {
3042           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3043         }
3044 
3045         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3046 
3047           /* copy quadrature constraints for change of basis check */
3048           if (pcbddc->dbg_flag) {
3049             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3050           }
3051           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3052           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3053 
3054           /* compute QR decomposition of constraints */
3055           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3056           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3057           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3058           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3059           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3060           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3061           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3062 
3063           /* explictly compute R^-T */
3064           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3065           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3066           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3067           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3068           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3069           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3070           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3071           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3072           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3073           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3074 
3075           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3076           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3077           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3078           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3079           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3080           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3081           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3082           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3083           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3084 
3085           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3086              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3087              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3088           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3089           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3090           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3091           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3092           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3093           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3094           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3095           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3096           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3097           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3098 
3099           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3100           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3101           /* insert cols for primal dofs */
3102           for (j=0;j<primal_dofs;j++) {
3103             start_vals = &qr_basis[j*size_of_constraint];
3104             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3105             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3106           }
3107           /* insert cols for dual dofs */
3108           for (j=0,k=0;j<dual_dofs;k++) {
3109             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3110               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3111               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3112               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3113               j++;
3114             }
3115           }
3116 
3117           /* check change of basis */
3118           if (pcbddc->dbg_flag) {
3119             PetscInt   ii,jj;
3120             PetscBool valid_qr=PETSC_TRUE;
3121             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3122             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3123             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3124             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3125             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3126             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3127             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3128             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3129             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3130             for (jj=0;jj<size_of_constraint;jj++) {
3131               for (ii=0;ii<primal_dofs;ii++) {
3132                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3133                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3134               }
3135             }
3136             if (!valid_qr) {
3137               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3138               for (jj=0;jj<size_of_constraint;jj++) {
3139                 for (ii=0;ii<primal_dofs;ii++) {
3140                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3141                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3142                   }
3143                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3144                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3145                   }
3146                 }
3147               }
3148             } else {
3149               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3150             }
3151           }
3152         } else { /* simple transformation block */
3153           PetscInt    row,col;
3154           PetscScalar val,norm;
3155 
3156           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3157           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3158           for (j=0;j<size_of_constraint;j++) {
3159             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3160             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3161             if (!PetscBTLookup(is_primal,row_B)) {
3162               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3163               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3164               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3165             } else {
3166               for (k=0;k<size_of_constraint;k++) {
3167                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3168                 if (row != col) {
3169                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3170                 } else {
3171                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3172                 }
3173                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3174               }
3175             }
3176           }
3177           if (pcbddc->dbg_flag) {
3178             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3179           }
3180         }
3181       } else {
3182         if (pcbddc->dbg_flag) {
3183           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3184         }
3185       }
3186     }
3187 
3188     /* free workspace */
3189     if (qr_needed) {
3190       if (pcbddc->dbg_flag) {
3191         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3192       }
3193       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3194       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3195       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3196       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3197       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3198     }
3199     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3200     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3201     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3202 
3203     /* assembling of global change of variable */
3204     {
3205       Mat      tmat;
3206       PetscInt bs;
3207 
3208       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3209       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3210       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3211       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3212       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3213       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3214       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3215       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3216       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3217       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3218       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3219       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3220       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3221       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3222       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3223       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3224       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3225       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3226     }
3227     /* check */
3228     if (pcbddc->dbg_flag) {
3229       PetscReal error;
3230       Vec       x,x_change;
3231 
3232       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3233       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3234       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3235       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3236       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3237       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3238       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3239       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3240       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3241       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3242       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3243       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3244       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3245       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3246       ierr = VecDestroy(&x);CHKERRQ(ierr);
3247       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3248     }
3249 
3250     /* adapt sub_schurs computed (if any) */
3251     if (pcbddc->use_deluxe_scaling) {
3252       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3253       if (sub_schurs->S_Ej_all) {
3254         Mat                    S_new,tmat;
3255         ISLocalToGlobalMapping NtoSall;
3256         IS                     is_all_N,is_V,is_V_Sall;
3257         const PetscScalar      *array;
3258         const PetscInt         *idxs_V,*idxs_all;
3259         PetscInt               i,n_V;
3260 
3261         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3262         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3263         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3264         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3265         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3266         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3267         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3268         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3269         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3270         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3271         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3272         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3273         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3274         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3275         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3276         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3277         for (i=0;i<n_V;i++) {
3278           PetscScalar val;
3279           PetscInt    idx;
3280 
3281           idx = idxs_V[i];
3282           val = array[idxs_all[idxs_V[i]]];
3283           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3284         }
3285         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287         sub_schurs->S_Ej_all = S_new;
3288         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3289         if (sub_schurs->sum_S_Ej_all) {
3290           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3291           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3292           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3293           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3294           sub_schurs->sum_S_Ej_all = S_new;
3295           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3296         }
3297         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3298         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3299         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3300         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3301         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3302       }
3303     }
3304     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3305   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3306     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3307     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3308   }
3309 
3310   /* set up change of basis context */
3311   if (pcbddc->ChangeOfBasisMatrix) {
3312     PCBDDCChange_ctx change_ctx;
3313 
3314     if (!pcbddc->new_global_mat) {
3315       PetscInt global_size,local_size;
3316 
3317       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3318       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3319       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3320       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3321       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3322       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3323       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3324       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3325       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3326     } else {
3327       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3328       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3329       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3330     }
3331     if (!pcbddc->user_ChangeOfBasisMatrix) {
3332       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3333       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3334     } else {
3335       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3336       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3337     }
3338     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3339     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3340     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3341     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3342   }
3343 
3344   /* add pressure dof to set of primal nodes for numbering purposes */
3345   if (pcbddc->benign_p0_lidx >= 0) {
3346     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx;
3347     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx;
3348     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3349     pcbddc->local_primal_size_cc++;
3350     pcbddc->local_primal_size++;
3351   }
3352 
3353   /* check if a new primal space has been introduced (also take into account benign trick) */
3354   pcbddc->new_primal_space_local = PETSC_TRUE;
3355   if (olocal_primal_size == pcbddc->local_primal_size) {
3356     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3357     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3358     if (!pcbddc->new_primal_space_local) {
3359       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3360       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3361     }
3362   }
3363   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3364   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3365   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3366 
3367   /* flush dbg viewer */
3368   if (pcbddc->dbg_flag) {
3369     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3370   }
3371 
3372   /* free workspace */
3373   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3374   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3375   if (!pcbddc->adaptive_selection) {
3376     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3377     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3378   } else {
3379     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3380                       pcbddc->adaptive_constraints_idxs_ptr,
3381                       pcbddc->adaptive_constraints_data_ptr,
3382                       pcbddc->adaptive_constraints_idxs,
3383                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3384     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3385     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3386   }
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 #undef __FUNCT__
3391 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3392 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3393 {
3394   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3395   PC_IS       *pcis = (PC_IS*)pc->data;
3396   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3397   PetscInt    ierr,i,vertex_size,N;
3398   PetscViewer viewer=pcbddc->dbg_viewer;
3399 
3400   PetscFunctionBegin;
3401   /* Reset previously computed graph */
3402   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3403   /* Init local Graph struct */
3404   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3405   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3406 
3407   /* Check validity of the csr graph passed in by the user */
3408   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3409     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3410   }
3411 
3412   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3413   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3414     PetscInt  *xadj,*adjncy;
3415     PetscInt  nvtxs;
3416     PetscBool flg_row=PETSC_FALSE;
3417 
3418     if (pcbddc->use_local_adj) {
3419 
3420       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3421       if (flg_row) {
3422         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3423         pcbddc->computed_rowadj = PETSC_TRUE;
3424       }
3425       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3426     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3427       IS                     is_dummy;
3428       ISLocalToGlobalMapping l2gmap_dummy;
3429       PetscInt               j,sum;
3430       PetscInt               *cxadj,*cadjncy;
3431       const PetscInt         *idxs;
3432       PCBDDCGraph            graph;
3433       PetscBT                is_on_boundary;
3434 
3435       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3436       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3437       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3438       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3439       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3440       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3441       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3442       if (flg_row) {
3443         graph->xadj = xadj;
3444         graph->adjncy = adjncy;
3445       }
3446       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3447       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3448       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3449 
3450       if (pcbddc->dbg_flag) {
3451         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3452         for (i=0;i<graph->ncc;i++) {
3453           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3454         }
3455       }
3456 
3457       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3458       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3459       for (i=0;i<pcis->n_B;i++) {
3460         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3461       }
3462       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3463 
3464       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3465       sum = 0;
3466       for (i=0;i<graph->ncc;i++) {
3467         PetscInt sizecc = 0;
3468         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3469           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3470             sizecc++;
3471           }
3472         }
3473         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3474           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3475             cxadj[graph->queue[j]] = sizecc;
3476           }
3477         }
3478         sum += sizecc*sizecc;
3479       }
3480       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3481       sum = 0;
3482       for (i=0;i<pcis->n;i++) {
3483         PetscInt temp = cxadj[i];
3484         cxadj[i] = sum;
3485         sum += temp;
3486       }
3487       cxadj[pcis->n] = sum;
3488       for (i=0;i<graph->ncc;i++) {
3489         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3490           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3491             PetscInt k,sizecc = 0;
3492             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3493               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3494                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3495                 sizecc++;
3496               }
3497             }
3498           }
3499         }
3500       }
3501       if (sum) {
3502         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3503       } else {
3504         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3505         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3506       }
3507       graph->xadj = 0;
3508       graph->adjncy = 0;
3509       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3510       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3511     }
3512   }
3513   if (pcbddc->dbg_flag) {
3514     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3515   }
3516 
3517   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3518   vertex_size = 1;
3519   if (pcbddc->user_provided_isfordofs) {
3520     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3521       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3522       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3523         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3524         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3525       }
3526       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3527       pcbddc->n_ISForDofs = 0;
3528       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3529     }
3530     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3531     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3532   } else {
3533     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3534       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3535       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3536       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3537         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3538       }
3539     }
3540   }
3541 
3542   /* Setup of Graph */
3543   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3544     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3545   }
3546   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3547     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3548   }
3549   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3550 
3551   /* Graph's connected components analysis */
3552   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3553 
3554   /* print some info to stdout */
3555   if (pcbddc->dbg_flag) {
3556     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3557   }
3558 
3559   /* mark topography has done */
3560   pcbddc->recompute_topography = PETSC_FALSE;
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3565 #undef __FUNCT__
3566 #define __FUNCT__ "PCBDDCSubsetNumbering"
3567 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3568 {
3569   PetscSF        sf;
3570   PetscLayout    map;
3571   const PetscInt *idxs;
3572   PetscInt       *leaf_data,*root_data,*gidxs;
3573   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3574   PetscInt       n_n,nlocals,start,first_index;
3575   PetscMPIInt    commsize;
3576   PetscBool      first_found;
3577   PetscErrorCode ierr;
3578 
3579   PetscFunctionBegin;
3580   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3581   if (subset_mult) {
3582     PetscCheckSameComm(subset,1,subset_mult,2);
3583     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3584     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3585   }
3586   /* create workspace layout for computing global indices of subset */
3587   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3588   lbounds[0] = lbounds[1] = 0;
3589   for (i=0;i<n;i++) {
3590     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3591     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3592   }
3593   lbounds[0] = -lbounds[0];
3594   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3595   gbounds[0] = -gbounds[0];
3596   N = gbounds[1] - gbounds[0] + 1;
3597   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3598   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3599   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3600   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3601   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3602 
3603   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3604   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3605   if (subset_mult) {
3606     const PetscInt* idxs_mult;
3607 
3608     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3609     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3610     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3611   } else {
3612     for (i=0;i<n;i++) leaf_data[i] = 1;
3613   }
3614   /* local size of new subset */
3615   n_n = 0;
3616   for (i=0;i<n;i++) n_n += leaf_data[i];
3617 
3618   /* global indexes in layout */
3619   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3620   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3621   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3622   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3623   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3624   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3625 
3626   /* reduce from leaves to roots */
3627   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3628   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3629   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3630 
3631   /* count indexes in local part of layout */
3632   nlocals = 0;
3633   first_index = -1;
3634   first_found = PETSC_FALSE;
3635   for (i=0;i<Nl;i++) {
3636     if (!first_found && root_data[i]) {
3637       first_found = PETSC_TRUE;
3638       first_index = i;
3639     }
3640     nlocals += root_data[i];
3641   }
3642 
3643   /* cumulative of number of indexes and size of subset without holes */
3644 #if defined(PETSC_HAVE_MPI_EXSCAN)
3645   start = 0;
3646   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3647 #else
3648   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3649   start = start-nlocals;
3650 #endif
3651 
3652   if (N_n) { /* compute total size of new subset if requested */
3653     *N_n = start + nlocals;
3654     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3655     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3656   }
3657 
3658   /* adapt root data with cumulative */
3659   if (first_found) {
3660     PetscInt old_index;
3661 
3662     root_data[first_index] += start;
3663     old_index = first_index;
3664     for (i=first_index+1;i<Nl;i++) {
3665       if (root_data[i]) {
3666         root_data[i] += root_data[old_index];
3667         old_index = i;
3668       }
3669     }
3670   }
3671 
3672   /* from roots to leaves */
3673   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3674   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3675   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3676 
3677   /* create new IS with global indexes without holes */
3678   if (subset_mult) {
3679     const PetscInt* idxs_mult;
3680     PetscInt        cum;
3681 
3682     cum = 0;
3683     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3684     for (i=0;i<n;i++) {
3685       PetscInt j;
3686       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3687     }
3688     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3689   } else {
3690     for (i=0;i<n;i++) {
3691       gidxs[i] = leaf_data[i]-1;
3692     }
3693   }
3694   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3695   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3696   PetscFunctionReturn(0);
3697 }
3698 
3699 #undef __FUNCT__
3700 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3701 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3702 {
3703   PetscInt       i,j;
3704   PetscScalar    *alphas;
3705   PetscErrorCode ierr;
3706 
3707   PetscFunctionBegin;
3708   /* this implements stabilized Gram-Schmidt */
3709   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3710   for (i=0;i<n;i++) {
3711     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3712     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3713     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3714   }
3715   ierr = PetscFree(alphas);CHKERRQ(ierr);
3716   PetscFunctionReturn(0);
3717 }
3718 
3719 #undef __FUNCT__
3720 #define __FUNCT__ "MatISGetSubassemblingPattern"
3721 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3722 {
3723   IS             ranks_send_to;
3724   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3725   PetscMPIInt    size,rank,color;
3726   PetscInt       *xadj,*adjncy;
3727   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3728   PetscInt       i,local_size,threshold=0;
3729   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3730   PetscSubcomm   subcomm;
3731   PetscErrorCode ierr;
3732 
3733   PetscFunctionBegin;
3734   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3735   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3736   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3737 
3738   /* Get info on mapping */
3739   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3740   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3741 
3742   /* build local CSR graph of subdomains' connectivity */
3743   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3744   xadj[0] = 0;
3745   xadj[1] = PetscMax(n_neighs-1,0);
3746   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3747   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3748 
3749   if (threshold) {
3750     PetscInt xadj_count = 0;
3751     for (i=1;i<n_neighs;i++) {
3752       if (n_shared[i] > threshold) {
3753         adjncy[xadj_count] = neighs[i];
3754         adjncy_wgt[xadj_count] = n_shared[i];
3755         xadj_count++;
3756       }
3757     }
3758     xadj[1] = xadj_count;
3759   } else {
3760     if (xadj[1]) {
3761       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3762       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3763     }
3764   }
3765   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3766   if (use_square) {
3767     for (i=0;i<xadj[1];i++) {
3768       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3769     }
3770   }
3771   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3772 
3773   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3774 
3775   /*
3776     Restrict work on active processes only.
3777   */
3778   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3779   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3780   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3781   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3782   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3783   if (color) {
3784     ierr = PetscFree(xadj);CHKERRQ(ierr);
3785     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3786     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3787   } else {
3788     Mat             subdomain_adj;
3789     IS              new_ranks,new_ranks_contig;
3790     MatPartitioning partitioner;
3791     PetscInt        prank,rstart=0,rend=0;
3792     PetscInt        *is_indices,*oldranks;
3793     PetscBool       aggregate;
3794 
3795     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3796     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3797     prank = rank;
3798     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3799     /*
3800     for (i=0;i<size;i++) {
3801       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3802     }
3803     */
3804     for (i=0;i<xadj[1];i++) {
3805       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3806     }
3807     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3808     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3809     if (aggregate) {
3810       PetscInt    lrows,row,ncols,*cols;
3811       PetscMPIInt nrank;
3812       PetscScalar *vals;
3813 
3814       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3815       lrows = 0;
3816       if (nrank<redprocs) {
3817         lrows = size/redprocs;
3818         if (nrank<size%redprocs) lrows++;
3819       }
3820       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3821       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3822       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3823       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3824       row = nrank;
3825       ncols = xadj[1]-xadj[0];
3826       cols = adjncy;
3827       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3828       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3829       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3830       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832       ierr = PetscFree(xadj);CHKERRQ(ierr);
3833       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3834       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3835       ierr = PetscFree(vals);CHKERRQ(ierr);
3836     } else {
3837       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3838     }
3839     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3840 
3841     /* Partition */
3842     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3843     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3844     if (use_vwgt) {
3845       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3846       v_wgt[0] = local_size;
3847       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3848     }
3849     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3850     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3851     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3852     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3853     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3854 
3855     /* renumber new_ranks to avoid "holes" in new set of processors */
3856     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3857     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3858     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3859     if (!redprocs) {
3860       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3861     } else {
3862       PetscInt    idxs[1];
3863       PetscMPIInt tag;
3864       MPI_Request *reqs;
3865 
3866       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3867       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3868       for (i=rstart;i<rend;i++) {
3869         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3870       }
3871       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3872       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3873       ierr = PetscFree(reqs);CHKERRQ(ierr);
3874       ranks_send_to_idx[0] = oldranks[idxs[0]];
3875     }
3876     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3877     /* clean up */
3878     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3879     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3880     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3881     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3882   }
3883   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3884 
3885   /* assemble parallel IS for sends */
3886   i = 1;
3887   if (color) i=0;
3888   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3889   /* get back IS */
3890   *is_sends = ranks_send_to;
3891   PetscFunctionReturn(0);
3892 }
3893 
3894 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3895 
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatISSubassemble"
3898 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3899 {
3900   Mat                    local_mat;
3901   IS                     is_sends_internal;
3902   PetscInt               rows,cols,new_local_rows;
3903   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3904   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3905   ISLocalToGlobalMapping l2gmap;
3906   PetscInt*              l2gmap_indices;
3907   const PetscInt*        is_indices;
3908   MatType                new_local_type;
3909   /* buffers */
3910   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3911   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3912   PetscInt               *recv_buffer_idxs_local;
3913   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3914   /* MPI */
3915   MPI_Comm               comm,comm_n;
3916   PetscSubcomm           subcomm;
3917   PetscMPIInt            n_sends,n_recvs,commsize;
3918   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3919   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3920   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3921   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3922   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3923   PetscErrorCode         ierr;
3924 
3925   PetscFunctionBegin;
3926   /* TODO: add missing checks */
3927   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3928   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3929   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3930   PetscValidLogicalCollectiveInt(mat,nis,7);
3931   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3932   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3933   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3934   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3935   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3936   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3937   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3938   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3939     PetscInt mrows,mcols,mnrows,mncols;
3940     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3941     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3942     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3943     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3944     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3945     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3946   }
3947   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3948   PetscValidLogicalCollectiveInt(mat,bs,0);
3949   /* prepare IS for sending if not provided */
3950   if (!is_sends) {
3951     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3952     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3953   } else {
3954     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3955     is_sends_internal = is_sends;
3956   }
3957 
3958   /* get comm */
3959   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3960 
3961   /* compute number of sends */
3962   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3963   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3964 
3965   /* compute number of receives */
3966   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3967   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3968   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3969   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3970   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3971   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3972   ierr = PetscFree(iflags);CHKERRQ(ierr);
3973 
3974   /* restrict comm if requested */
3975   subcomm = 0;
3976   destroy_mat = PETSC_FALSE;
3977   if (restrict_comm) {
3978     PetscMPIInt color,subcommsize;
3979 
3980     color = 0;
3981     if (restrict_full) {
3982       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3983     } else {
3984       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3985     }
3986     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3987     subcommsize = commsize - subcommsize;
3988     /* check if reuse has been requested */
3989     if (reuse == MAT_REUSE_MATRIX) {
3990       if (*mat_n) {
3991         PetscMPIInt subcommsize2;
3992         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3993         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3994         comm_n = PetscObjectComm((PetscObject)*mat_n);
3995       } else {
3996         comm_n = PETSC_COMM_SELF;
3997       }
3998     } else { /* MAT_INITIAL_MATRIX */
3999       PetscMPIInt rank;
4000 
4001       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4002       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4003       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4004       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4005       comm_n = PetscSubcommChild(subcomm);
4006     }
4007     /* flag to destroy *mat_n if not significative */
4008     if (color) destroy_mat = PETSC_TRUE;
4009   } else {
4010     comm_n = comm;
4011   }
4012 
4013   /* prepare send/receive buffers */
4014   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4015   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4016   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4017   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4018   if (nis) {
4019     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4020   }
4021 
4022   /* Get data from local matrices */
4023   if (!isdense) {
4024     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4025     /* TODO: See below some guidelines on how to prepare the local buffers */
4026     /*
4027        send_buffer_vals should contain the raw values of the local matrix
4028        send_buffer_idxs should contain:
4029        - MatType_PRIVATE type
4030        - PetscInt        size_of_l2gmap
4031        - PetscInt        global_row_indices[size_of_l2gmap]
4032        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4033     */
4034   } else {
4035     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4036     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4037     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4038     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4039     send_buffer_idxs[1] = i;
4040     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4041     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4042     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4043     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4044     for (i=0;i<n_sends;i++) {
4045       ilengths_vals[is_indices[i]] = len*len;
4046       ilengths_idxs[is_indices[i]] = len+2;
4047     }
4048   }
4049   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4050   /* additional is (if any) */
4051   if (nis) {
4052     PetscMPIInt psum;
4053     PetscInt j;
4054     for (j=0,psum=0;j<nis;j++) {
4055       PetscInt plen;
4056       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4057       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4058       psum += len+1; /* indices + lenght */
4059     }
4060     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4061     for (j=0,psum=0;j<nis;j++) {
4062       PetscInt plen;
4063       const PetscInt *is_array_idxs;
4064       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4065       send_buffer_idxs_is[psum] = plen;
4066       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4067       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4068       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4069       psum += plen+1; /* indices + lenght */
4070     }
4071     for (i=0;i<n_sends;i++) {
4072       ilengths_idxs_is[is_indices[i]] = psum;
4073     }
4074     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4075   }
4076 
4077   buf_size_idxs = 0;
4078   buf_size_vals = 0;
4079   buf_size_idxs_is = 0;
4080   for (i=0;i<n_recvs;i++) {
4081     buf_size_idxs += (PetscInt)olengths_idxs[i];
4082     buf_size_vals += (PetscInt)olengths_vals[i];
4083     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4084   }
4085   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4086   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4087   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4088 
4089   /* get new tags for clean communications */
4090   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4091   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4092   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4093 
4094   /* allocate for requests */
4095   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4096   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4097   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4098   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4099   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4100   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4101 
4102   /* communications */
4103   ptr_idxs = recv_buffer_idxs;
4104   ptr_vals = recv_buffer_vals;
4105   ptr_idxs_is = recv_buffer_idxs_is;
4106   for (i=0;i<n_recvs;i++) {
4107     source_dest = onodes[i];
4108     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4109     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4110     ptr_idxs += olengths_idxs[i];
4111     ptr_vals += olengths_vals[i];
4112     if (nis) {
4113       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4114       ptr_idxs_is += olengths_idxs_is[i];
4115     }
4116   }
4117   for (i=0;i<n_sends;i++) {
4118     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4119     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4120     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4121     if (nis) {
4122       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4123     }
4124   }
4125   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4126   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4127 
4128   /* assemble new l2g map */
4129   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4130   ptr_idxs = recv_buffer_idxs;
4131   new_local_rows = 0;
4132   for (i=0;i<n_recvs;i++) {
4133     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4134     ptr_idxs += olengths_idxs[i];
4135   }
4136   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4137   ptr_idxs = recv_buffer_idxs;
4138   new_local_rows = 0;
4139   for (i=0;i<n_recvs;i++) {
4140     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4141     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4142     ptr_idxs += olengths_idxs[i];
4143   }
4144   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4145   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4146   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4147 
4148   /* infer new local matrix type from received local matrices type */
4149   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4150   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4151   if (n_recvs) {
4152     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4153     ptr_idxs = recv_buffer_idxs;
4154     for (i=0;i<n_recvs;i++) {
4155       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4156         new_local_type_private = MATAIJ_PRIVATE;
4157         break;
4158       }
4159       ptr_idxs += olengths_idxs[i];
4160     }
4161     switch (new_local_type_private) {
4162       case MATDENSE_PRIVATE:
4163         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4164           new_local_type = MATSEQAIJ;
4165           bs = 1;
4166         } else { /* if I receive only 1 dense matrix */
4167           new_local_type = MATSEQDENSE;
4168           bs = 1;
4169         }
4170         break;
4171       case MATAIJ_PRIVATE:
4172         new_local_type = MATSEQAIJ;
4173         bs = 1;
4174         break;
4175       case MATBAIJ_PRIVATE:
4176         new_local_type = MATSEQBAIJ;
4177         break;
4178       case MATSBAIJ_PRIVATE:
4179         new_local_type = MATSEQSBAIJ;
4180         break;
4181       default:
4182         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4183         break;
4184     }
4185   } else { /* by default, new_local_type is seqdense */
4186     new_local_type = MATSEQDENSE;
4187     bs = 1;
4188   }
4189 
4190   /* create MATIS object if needed */
4191   if (reuse == MAT_INITIAL_MATRIX) {
4192     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4193     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4194   } else {
4195     /* it also destroys the local matrices */
4196     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4197   }
4198   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4199   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4200 
4201   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4202 
4203   /* Global to local map of received indices */
4204   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4205   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4206   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4207 
4208   /* restore attributes -> type of incoming data and its size */
4209   buf_size_idxs = 0;
4210   for (i=0;i<n_recvs;i++) {
4211     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4212     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4213     buf_size_idxs += (PetscInt)olengths_idxs[i];
4214   }
4215   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4216 
4217   /* set preallocation */
4218   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4219   if (!newisdense) {
4220     PetscInt *new_local_nnz=0;
4221 
4222     ptr_vals = recv_buffer_vals;
4223     ptr_idxs = recv_buffer_idxs_local;
4224     if (n_recvs) {
4225       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4226     }
4227     for (i=0;i<n_recvs;i++) {
4228       PetscInt j;
4229       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4230         for (j=0;j<*(ptr_idxs+1);j++) {
4231           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4232         }
4233       } else {
4234         /* TODO */
4235       }
4236       ptr_idxs += olengths_idxs[i];
4237     }
4238     if (new_local_nnz) {
4239       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4240       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4241       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4242       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4243       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4244       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4245     } else {
4246       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4247     }
4248     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4249   } else {
4250     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4251   }
4252 
4253   /* set values */
4254   ptr_vals = recv_buffer_vals;
4255   ptr_idxs = recv_buffer_idxs_local;
4256   for (i=0;i<n_recvs;i++) {
4257     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4258       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4259       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4260       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4261       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4262       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4263     } else {
4264       /* TODO */
4265     }
4266     ptr_idxs += olengths_idxs[i];
4267     ptr_vals += olengths_vals[i];
4268   }
4269   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4270   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4271   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4272   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4273   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4274   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4275 
4276 #if 0
4277   if (!restrict_comm) { /* check */
4278     Vec       lvec,rvec;
4279     PetscReal infty_error;
4280 
4281     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4282     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4283     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4284     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4285     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4286     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4287     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4288     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4289     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4290   }
4291 #endif
4292 
4293   /* assemble new additional is (if any) */
4294   if (nis) {
4295     PetscInt **temp_idxs,*count_is,j,psum;
4296 
4297     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4298     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4299     ptr_idxs = recv_buffer_idxs_is;
4300     psum = 0;
4301     for (i=0;i<n_recvs;i++) {
4302       for (j=0;j<nis;j++) {
4303         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4304         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4305         psum += plen;
4306         ptr_idxs += plen+1; /* shift pointer to received data */
4307       }
4308     }
4309     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4310     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4311     for (i=1;i<nis;i++) {
4312       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4313     }
4314     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4315     ptr_idxs = recv_buffer_idxs_is;
4316     for (i=0;i<n_recvs;i++) {
4317       for (j=0;j<nis;j++) {
4318         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4319         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4320         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4321         ptr_idxs += plen+1; /* shift pointer to received data */
4322       }
4323     }
4324     for (i=0;i<nis;i++) {
4325       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4326       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4327       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4328     }
4329     ierr = PetscFree(count_is);CHKERRQ(ierr);
4330     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4331     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4332   }
4333   /* free workspace */
4334   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4335   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4336   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4337   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4338   if (isdense) {
4339     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4340     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4341   } else {
4342     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4343   }
4344   if (nis) {
4345     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4346     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4347   }
4348   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4349   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4350   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4351   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4352   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4353   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4354   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4355   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4356   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4357   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4358   ierr = PetscFree(onodes);CHKERRQ(ierr);
4359   if (nis) {
4360     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4361     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4362     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4363   }
4364   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4365   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4366     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4367     for (i=0;i<nis;i++) {
4368       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4369     }
4370     *mat_n = NULL;
4371   }
4372   PetscFunctionReturn(0);
4373 }
4374 
4375 /* temporary hack into ksp private data structure */
4376 #include <petsc/private/kspimpl.h>
4377 
4378 #undef __FUNCT__
4379 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4380 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4381 {
4382   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4383   PC_IS                  *pcis = (PC_IS*)pc->data;
4384   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4385   MatNullSpace           CoarseNullSpace=NULL;
4386   ISLocalToGlobalMapping coarse_islg;
4387   IS                     coarse_is,*isarray;
4388   PetscInt               i,im_active=-1,active_procs=-1;
4389   PetscInt               nis,nisdofs,nisneu;
4390   PC                     pc_temp;
4391   PCType                 coarse_pc_type;
4392   KSPType                coarse_ksp_type;
4393   PetscBool              multilevel_requested,multilevel_allowed;
4394   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4395   Mat                    t_coarse_mat_is;
4396   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4397   PetscMPIInt            all_procs;
4398   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4399   PetscBool              compute_vecs = PETSC_FALSE;
4400   PetscScalar            *array;
4401   PetscErrorCode         ierr;
4402 
4403   PetscFunctionBegin;
4404   /* Assign global numbering to coarse dofs */
4405   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4406     PetscInt ocoarse_size;
4407     compute_vecs = PETSC_TRUE;
4408     ocoarse_size = pcbddc->coarse_size;
4409     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4410     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4411     /* see if we can avoid some work */
4412     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4413       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4414       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4415         PC        pc;
4416         PetscBool isbddc;
4417 
4418         /* temporary workaround since PCBDDC does not have a reset method so far */
4419         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4420         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4421         if (isbddc) {
4422           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4423         }
4424         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4425         coarse_reuse = PETSC_FALSE;
4426       } else { /* we can safely reuse already computed coarse matrix */
4427         coarse_reuse = PETSC_TRUE;
4428       }
4429     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4430       coarse_reuse = PETSC_FALSE;
4431     }
4432     /* reset any subassembling information */
4433     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4434     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4435   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4436     coarse_reuse = PETSC_TRUE;
4437   }
4438 
4439   /* count "active" (i.e. with positive local size) and "void" processes */
4440   im_active = !!(pcis->n);
4441   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4442   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4443   void_procs = all_procs-active_procs;
4444   csin_type_simple = PETSC_TRUE;
4445   redist = PETSC_FALSE;
4446   if (pcbddc->current_level && void_procs) {
4447     csin_ml = PETSC_TRUE;
4448     ncoarse_ml = void_procs;
4449     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4450     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4451       csin_ds = PETSC_TRUE;
4452       ncoarse_ds = pcbddc->redistribute_coarse;
4453       redist = PETSC_TRUE;
4454     } else {
4455       csin_ds = PETSC_TRUE;
4456       ncoarse_ds = active_procs;
4457       redist = PETSC_TRUE;
4458     }
4459   } else {
4460     csin_ml = PETSC_FALSE;
4461     ncoarse_ml = all_procs;
4462     if (void_procs) {
4463       csin_ds = PETSC_TRUE;
4464       ncoarse_ds = void_procs;
4465       csin_type_simple = PETSC_FALSE;
4466     } else {
4467       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4468         csin_ds = PETSC_TRUE;
4469         ncoarse_ds = pcbddc->redistribute_coarse;
4470         redist = PETSC_TRUE;
4471       } else {
4472         csin_ds = PETSC_FALSE;
4473         ncoarse_ds = all_procs;
4474       }
4475     }
4476   }
4477 
4478   /*
4479     test if we can go multilevel: three conditions must be satisfied:
4480     - we have not exceeded the number of levels requested
4481     - we can actually subassemble the active processes
4482     - we can find a suitable number of MPI processes where we can place the subassembled problem
4483   */
4484   multilevel_allowed = PETSC_FALSE;
4485   multilevel_requested = PETSC_FALSE;
4486   if (pcbddc->current_level < pcbddc->max_levels) {
4487     multilevel_requested = PETSC_TRUE;
4488     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4489       multilevel_allowed = PETSC_FALSE;
4490     } else {
4491       multilevel_allowed = PETSC_TRUE;
4492     }
4493   }
4494   /* determine number of process partecipating to coarse solver */
4495   if (multilevel_allowed) {
4496     ncoarse = ncoarse_ml;
4497     csin = csin_ml;
4498     redist = PETSC_FALSE;
4499   } else {
4500     ncoarse = ncoarse_ds;
4501     csin = csin_ds;
4502   }
4503 
4504   /* creates temporary l2gmap and IS for coarse indexes */
4505   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4506   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4507 
4508   /* creates temporary MATIS object for coarse matrix */
4509   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4510   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4511   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4512   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4513 #if 0
4514   {
4515     PetscViewer viewer;
4516     char filename[256];
4517     sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4518     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4519     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4520     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4521     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4522   }
4523 #endif
4524   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4525   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4526   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4527   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4528   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4529 
4530   /* compute dofs splitting and neumann boundaries for coarse dofs */
4531   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4532     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4533     const PetscInt         *idxs;
4534     ISLocalToGlobalMapping tmap;
4535 
4536     /* create map between primal indices (in local representative ordering) and local primal numbering */
4537     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4538     /* allocate space for temporary storage */
4539     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4540     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4541     /* allocate for IS array */
4542     nisdofs = pcbddc->n_ISForDofsLocal;
4543     nisneu = !!pcbddc->NeumannBoundariesLocal;
4544     nis = nisdofs + nisneu;
4545     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4546     /* dofs splitting */
4547     for (i=0;i<nisdofs;i++) {
4548       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4549       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4550       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4551       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4552       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4553       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4554       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4555       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4556     }
4557     /* neumann boundaries */
4558     if (pcbddc->NeumannBoundariesLocal) {
4559       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4560       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4561       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4562       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4563       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4564       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4565       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4566       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4567     }
4568     /* free memory */
4569     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4570     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4571     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4572   } else {
4573     nis = 0;
4574     nisdofs = 0;
4575     nisneu = 0;
4576     isarray = NULL;
4577   }
4578   /* destroy no longer needed map */
4579   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4580 
4581   /* restrict on coarse candidates (if needed) */
4582   coarse_mat_is = NULL;
4583   if (csin) {
4584     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4585       if (redist) {
4586         PetscMPIInt rank;
4587         PetscInt    spc,n_spc_p1,dest[1],destsize;
4588 
4589         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4590         spc = active_procs/ncoarse;
4591         n_spc_p1 = active_procs%ncoarse;
4592         if (im_active) {
4593           destsize = 1;
4594           if (rank > n_spc_p1*(spc+1)-1) {
4595             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4596           } else {
4597             dest[0] = rank/(spc+1);
4598           }
4599         } else {
4600           destsize = 0;
4601         }
4602         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4603       } else if (csin_type_simple) {
4604         PetscMPIInt rank;
4605         PetscInt    issize,isidx;
4606 
4607         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4608         if (im_active) {
4609           issize = 1;
4610           isidx = (PetscInt)rank;
4611         } else {
4612           issize = 0;
4613           isidx = -1;
4614         }
4615         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4616       } else { /* get a suitable subassembling pattern from MATIS code */
4617         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4618       }
4619 
4620       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4621       if (!redist || ncoarse <= void_procs) {
4622         PetscInt ncoarse_cand,tissize,*nisindices;
4623         PetscInt *coarse_candidates;
4624         const PetscInt* tisindices;
4625 
4626         /* get coarse candidates' ranks in pc communicator */
4627         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4628         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4629         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4630           if (!coarse_candidates[i]) {
4631             coarse_candidates[ncoarse_cand++]=i;
4632           }
4633         }
4634         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4635 
4636 
4637         if (pcbddc->dbg_flag) {
4638           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4639           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4640           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4641           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4642           for (i=0;i<ncoarse_cand;i++) {
4643             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4644           }
4645           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4646           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4647         }
4648         /* shift the pattern on coarse candidates */
4649         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4650         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4651         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4652         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4653         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4654         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4655         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4656       }
4657       if (pcbddc->dbg_flag) {
4658         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4659         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4660         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4661         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4662       }
4663     }
4664     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4665     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4666       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4667     } else { /* this is the last level, so use just receiving processes in subcomm */
4668       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4669     }
4670   } else {
4671     if (pcbddc->dbg_flag) {
4672       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4673       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4674       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4675     }
4676     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4677     coarse_mat_is = t_coarse_mat_is;
4678   }
4679 
4680   /* create local to global scatters for coarse problem */
4681   if (compute_vecs) {
4682     PetscInt lrows;
4683     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4684     if (coarse_mat_is) {
4685       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4686     } else {
4687       lrows = 0;
4688     }
4689     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4690     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4691     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4692     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4693     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4694   }
4695   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4696   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4697 
4698   /* set defaults for coarse KSP and PC */
4699   if (multilevel_allowed) {
4700     coarse_ksp_type = KSPRICHARDSON;
4701     coarse_pc_type = PCBDDC;
4702   } else {
4703     coarse_ksp_type = KSPPREONLY;
4704     coarse_pc_type = PCREDUNDANT;
4705   }
4706 
4707   /* print some info if requested */
4708   if (pcbddc->dbg_flag) {
4709     if (!multilevel_allowed) {
4710       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4711       if (multilevel_requested) {
4712         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4713       } else if (pcbddc->max_levels) {
4714         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4715       }
4716       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4717     }
4718   }
4719 
4720   /* create the coarse KSP object only once with defaults */
4721   if (coarse_mat_is) {
4722     MatReuse coarse_mat_reuse;
4723     PetscViewer dbg_viewer = NULL;
4724     if (pcbddc->dbg_flag) {
4725       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4726       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4727     }
4728     if (!pcbddc->coarse_ksp) {
4729       char prefix[256],str_level[16];
4730       size_t len;
4731       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4732       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4733       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4734       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4735       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4736       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4737       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4738       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4739       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4740       /* prefix */
4741       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4742       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4743       if (!pcbddc->current_level) {
4744         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4745         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4746       } else {
4747         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4748         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4749         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4750         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4751         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4752         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4753       }
4754       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4755       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4756       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4757       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4758       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4759       /* allow user customization */
4760       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4761     }
4762     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4763     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4764     if (nisdofs) {
4765       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4766       for (i=0;i<nisdofs;i++) {
4767         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4768       }
4769     }
4770     if (nisneu) {
4771       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4772       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4773     }
4774 
4775     /* get some info after set from options */
4776     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4777     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4778     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4779     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4780       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4781       isbddc = PETSC_FALSE;
4782     }
4783     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4784     if (isredundant) {
4785       KSP inner_ksp;
4786       PC  inner_pc;
4787       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4788       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4789       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4790     }
4791 
4792     /* assemble coarse matrix */
4793     if (coarse_reuse) {
4794       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4795       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4796       coarse_mat_reuse = MAT_REUSE_MATRIX;
4797     } else {
4798       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4799     }
4800     if (isbddc || isnn) {
4801       if (pcbddc->coarsening_ratio > 1) {
4802         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4803           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4804           if (pcbddc->dbg_flag) {
4805             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4806             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4807             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4808             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4809           }
4810         }
4811         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4812       } else {
4813         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4814         coarse_mat = coarse_mat_is;
4815       }
4816     } else {
4817       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4818     }
4819     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4820 
4821     /* propagate symmetry info of coarse matrix */
4822     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4823     if (pc->pmat->symmetric_set) {
4824       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4825     }
4826     if (pc->pmat->hermitian_set) {
4827       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4828     }
4829     if (pc->pmat->spd_set) {
4830       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4831     }
4832     /* set operators */
4833     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4834     if (pcbddc->dbg_flag) {
4835       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4836     }
4837   } else { /* processes non partecipating to coarse solver (if any) */
4838     coarse_mat = 0;
4839   }
4840   ierr = PetscFree(isarray);CHKERRQ(ierr);
4841 #if 0
4842   {
4843     PetscViewer viewer;
4844     char filename[256];
4845     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
4846     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
4847     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4848     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4849     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4850   }
4851 #endif
4852 
4853   /* Compute coarse null space (special handling by BDDC only) */
4854 #if 0
4855   if (pcbddc->NullSpace) {
4856     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4857   }
4858 #endif
4859 
4860   if (pcbddc->coarse_ksp) {
4861     Vec crhs,csol;
4862     PetscBool ispreonly;
4863 
4864     if (CoarseNullSpace) {
4865       if (isbddc) {
4866         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4867       } else {
4868         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4869       }
4870     }
4871     /* setup coarse ksp */
4872     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4873     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4874     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4875     /* hack */
4876     if (!csol) {
4877       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4878     }
4879     if (!crhs) {
4880       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4881     }
4882     /* Check coarse problem if in debug mode or if solving with an iterative method */
4883     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4884     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4885       KSP       check_ksp;
4886       KSPType   check_ksp_type;
4887       PC        check_pc;
4888       Vec       check_vec,coarse_vec;
4889       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4890       PetscInt  its;
4891       PetscBool compute_eigs;
4892       PetscReal *eigs_r,*eigs_c;
4893       PetscInt  neigs;
4894       const char *prefix;
4895 
4896       /* Create ksp object suitable for estimation of extreme eigenvalues */
4897       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4898       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4899       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4900       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4901       if (ispreonly) {
4902         check_ksp_type = KSPPREONLY;
4903         compute_eigs = PETSC_FALSE;
4904       } else {
4905         check_ksp_type = KSPGMRES;
4906         compute_eigs = PETSC_TRUE;
4907       }
4908       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4909       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4910       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4911       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4912       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4913       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4914       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4915       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4916       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4917       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4918       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4919       /* create random vec */
4920       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4921       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4922       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4923       if (CoarseNullSpace) {
4924         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4925       }
4926       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4927       /* solve coarse problem */
4928       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4929       if (CoarseNullSpace) {
4930         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4931       }
4932       /* set eigenvalue estimation if preonly has not been requested */
4933       if (compute_eigs) {
4934         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4935         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4936         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4937         lambda_max = eigs_r[neigs-1];
4938         lambda_min = eigs_r[0];
4939         if (pcbddc->use_coarse_estimates) {
4940           if (lambda_max>lambda_min) {
4941             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4942             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4943           }
4944         }
4945       }
4946 
4947       /* check coarse problem residual error */
4948       if (pcbddc->dbg_flag) {
4949         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4950         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4951         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4952         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4953         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4954         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4955         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4956         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4957         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4958         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4959         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4960         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4961         if (compute_eigs) {
4962           PetscReal lambda_max_s,lambda_min_s;
4963           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4964           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4965           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4966           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4967           for (i=0;i<neigs;i++) {
4968             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4969           }
4970         }
4971         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4972         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4973       }
4974       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4975       if (compute_eigs) {
4976         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4977         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4978       }
4979     }
4980   }
4981   /* print additional info */
4982   if (pcbddc->dbg_flag) {
4983     /* waits until all processes reaches this point */
4984     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4985     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4986     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4987   }
4988 
4989   /* free memory */
4990   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4991   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 #undef __FUNCT__
4996 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4997 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4998 {
4999   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5000   PC_IS*         pcis = (PC_IS*)pc->data;
5001   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5002   IS             subset,subset_mult,subset_n;
5003   PetscInt       local_size,coarse_size=0;
5004   PetscInt       *local_primal_indices=NULL;
5005   const PetscInt *t_local_primal_indices;
5006   PetscErrorCode ierr;
5007 
5008   PetscFunctionBegin;
5009   /* Compute global number of coarse dofs */
5010   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5011     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5012   }
5013   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5014   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5015   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5016   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5017   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5018   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5019   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5020   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5021   if (local_size != pcbddc->local_primal_size) {
5022     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5023   }
5024   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5025   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5026   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5027   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5028   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5029 
5030   /* check numbering */
5031   if (pcbddc->dbg_flag) {
5032     PetscScalar coarsesum,*array,*array2;
5033     PetscInt    i;
5034     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5035 
5036     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5037     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5038     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5039     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5040     /* counter */
5041     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5042     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5043     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5044     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5045     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5046     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5047 
5048     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5049     for (i=0;i<pcbddc->local_primal_size;i++) {
5050       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5051     }
5052     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5053     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5054     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5055     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5056     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5057     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5058     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5059     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5060     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5061     for (i=0;i<pcis->n;i++) {
5062       if (array[i] != 0.0 && array[i] != array2[i]) {
5063         PetscInt owned = (PetscInt)(array[i]);
5064         PetscInt neigh = (PetscInt)(array2[i]);
5065         set_error = PETSC_TRUE;
5066         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5067       }
5068     }
5069     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5070     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5071     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5072     for (i=0;i<pcis->n;i++) {
5073       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5074     }
5075     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5076     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5077     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5078     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5079     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5080     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5081     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5082       PetscInt *gidxs;
5083 
5084       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5085       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5086       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5087       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5088       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5089       for (i=0;i<pcbddc->local_primal_size;i++) {
5090         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5091       }
5092       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5093       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5094     }
5095     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5096     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5097   }
5098   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5099   /* get back data */
5100   *coarse_size_n = coarse_size;
5101   *local_primal_indices_n = local_primal_indices;
5102   PetscFunctionReturn(0);
5103 }
5104 
5105 #undef __FUNCT__
5106 #define __FUNCT__ "PCBDDCGlobalToLocal"
5107 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5108 {
5109   IS             localis_t;
5110   PetscInt       i,lsize,*idxs,n;
5111   PetscScalar    *vals;
5112   PetscErrorCode ierr;
5113 
5114   PetscFunctionBegin;
5115   /* get indices in local ordering exploiting local to global map */
5116   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5117   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5118   for (i=0;i<lsize;i++) vals[i] = 1.0;
5119   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5120   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5121   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5122   if (idxs) { /* multilevel guard */
5123     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5124   }
5125   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5126   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5127   ierr = PetscFree(vals);CHKERRQ(ierr);
5128   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5129   /* now compute set in local ordering */
5130   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5131   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5132   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5133   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5134   for (i=0,lsize=0;i<n;i++) {
5135     if (PetscRealPart(vals[i]) > 0.5) {
5136       lsize++;
5137     }
5138   }
5139   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5140   for (i=0,lsize=0;i<n;i++) {
5141     if (PetscRealPart(vals[i]) > 0.5) {
5142       idxs[lsize++] = i;
5143     }
5144   }
5145   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5146   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5147   *localis = localis_t;
5148   PetscFunctionReturn(0);
5149 }
5150 
5151 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5152 #undef __FUNCT__
5153 #define __FUNCT__ "PCBDDCMatMult_Private"
5154 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5155 {
5156   PCBDDCChange_ctx change_ctx;
5157   PetscErrorCode   ierr;
5158 
5159   PetscFunctionBegin;
5160   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5161   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5162   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5163   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5164   PetscFunctionReturn(0);
5165 }
5166 
5167 #undef __FUNCT__
5168 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5169 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5170 {
5171   PCBDDCChange_ctx change_ctx;
5172   PetscErrorCode   ierr;
5173 
5174   PetscFunctionBegin;
5175   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5176   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5177   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5178   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5179   PetscFunctionReturn(0);
5180 }
5181 
5182 #undef __FUNCT__
5183 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5184 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5185 {
5186   PC_IS               *pcis=(PC_IS*)pc->data;
5187   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5188   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5189   Mat                 S_j;
5190   PetscInt            *used_xadj,*used_adjncy;
5191   PetscBool           free_used_adj;
5192   PetscErrorCode      ierr;
5193 
5194   PetscFunctionBegin;
5195   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5196   free_used_adj = PETSC_FALSE;
5197   if (pcbddc->sub_schurs_layers == -1) {
5198     used_xadj = NULL;
5199     used_adjncy = NULL;
5200   } else {
5201     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5202       used_xadj = pcbddc->mat_graph->xadj;
5203       used_adjncy = pcbddc->mat_graph->adjncy;
5204     } else if (pcbddc->computed_rowadj) {
5205       used_xadj = pcbddc->mat_graph->xadj;
5206       used_adjncy = pcbddc->mat_graph->adjncy;
5207     } else {
5208       PetscBool      flg_row=PETSC_FALSE;
5209       const PetscInt *xadj,*adjncy;
5210       PetscInt       nvtxs;
5211 
5212       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5213       if (flg_row) {
5214         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5215         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5216         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5217         free_used_adj = PETSC_TRUE;
5218       } else {
5219         pcbddc->sub_schurs_layers = -1;
5220         used_xadj = NULL;
5221         used_adjncy = NULL;
5222       }
5223       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5224     }
5225   }
5226 
5227   /* setup sub_schurs data */
5228   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5229   if (!sub_schurs->use_mumps) {
5230     /* pcbddc->ksp_D up to date only if not using MUMPS */
5231     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5232     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5233   } else {
5234     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5235     PetscBool isseqaij;
5236     if (!pcbddc->use_vertices && reuse_solvers) {
5237       PetscInt n_vertices;
5238 
5239       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5240       reuse_solvers = (PetscBool)!n_vertices;
5241     }
5242     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5243     if (!isseqaij) {
5244       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5245       if (matis->A == pcbddc->local_mat) {
5246         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5247         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5248       } else {
5249         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5250       }
5251     }
5252     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5253   }
5254   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5255 
5256   /* free adjacency */
5257   if (free_used_adj) {
5258     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5259   }
5260   PetscFunctionReturn(0);
5261 }
5262 
5263 #undef __FUNCT__
5264 #define __FUNCT__ "PCBDDCInitSubSchurs"
5265 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5266 {
5267   PC_IS               *pcis=(PC_IS*)pc->data;
5268   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5269   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5270   PCBDDCGraph         graph;
5271   PetscErrorCode      ierr;
5272 
5273   PetscFunctionBegin;
5274   /* attach interface graph for determining subsets */
5275   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5276     IS       verticesIS,verticescomm;
5277     PetscInt vsize,*idxs;
5278 
5279     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5280     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5281     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5282     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5283     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5284     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5285     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5286     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5287     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5288     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5289     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5290 /*
5291     if (pcbddc->dbg_flag) {
5292       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5293     }
5294 */
5295   } else {
5296     graph = pcbddc->mat_graph;
5297   }
5298 
5299   /* sub_schurs init */
5300   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5301 
5302   /* free graph struct */
5303   if (pcbddc->sub_schurs_rebuild) {
5304     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5305   }
5306   PetscFunctionReturn(0);
5307 }
5308