xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision fa34dd3e0ddce463db7312028d5aec86e7c6dfc3)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignCheck"
10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PC_IS*         pcis = (PC_IS*)(pc->data);
14   PetscErrorCode ierr;
15 
16   PetscFunctionBegin;
17   if (zerodiag) {
18     Mat            A;
19     Vec            vec3_N;
20     IS             dirIS = NULL;
21     PetscScalar    *vals;
22     const PetscInt *idxs;
23     PetscInt       i,nz;
24 
25     /* p0 */
26     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
27     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
28     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
29     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
30     for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */
31     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
32     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
33     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
34     /* v_I */
35     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
36     for (i=0;i<nz;i++) vals[i] = 0.;
37     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
38     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
39     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
40     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
41     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
42     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
43     ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
44     if (dirIS) {
45       PetscInt n;
46 
47       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
48       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
49       for (i=0;i<n;i++) vals[i] = 0.;
50       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
51       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
52     }
53     ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
54     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
55     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
56     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
57     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
58     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
59     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
60     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
61     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */
62       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
63     }
64     ierr = PetscFree(vals);CHKERRQ(ierr);
65     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
66   }
67 
68   /* check PCBDDCBenignGetOrSetP0 */
69   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
70   pcbddc->benign_p0 = -PetscGlobalRank;
71   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
72   pcbddc->benign_p0 = 1;
73   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
74   if (pcbddc->benign_p0_gidx >=0 && pcbddc->benign_p0 != -PetscGlobalRank) {
75     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr);
76   }
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
82 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
83 {
84   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
85   IS             zerodiag;
86   PetscInt       nz;
87   PetscBool      sorted;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
92   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
93   pcbddc->benign_original_mat = pcbddc->local_mat;
94   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
95   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
96   if (!sorted) {
97     ierr = ISSort(zerodiag);CHKERRQ(ierr);
98   }
99   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
100   if (nz) {
101     IS                zerodiagc;
102     PetscScalar       *array;
103     const PetscInt    *idxs,*idxsc;
104     PetscInt          i,n,*nnz;
105 
106     /* TODO: add check for shared dofs and raise error */
107     ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
108     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
109     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
110     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
111     /* local change of basis for pressures */
112     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
113     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
114     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
115     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
116     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
117     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */
118     for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */
119     nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */
120     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
121     ierr = PetscFree(nnz);CHKERRQ(ierr);
122     /* set identity on velocities */
123     for (i=0;i<n-nz;i++) {
124       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
125     }
126     /* set change on pressures */
127     for (i=0;i<nz-1;i++) {
128       PetscScalar vals[2];
129       PetscInt    cols[2];
130 
131       /* TODO: add quadrature */
132       cols[0] = idxs[i];
133       cols[1] = idxs[nz-1];
134       vals[0] = 1.;
135       vals[1] = 1./nz;
136       ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
137     }
138     ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr);
139     for (i=0;i<nz-1;i++) array[i] = -1.;
140     array[nz-1] = 1./nz;
141     ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
142     ierr = PetscFree(array);CHKERRQ(ierr);
143     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
144     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
145     /* TODO: need optimization? */
146     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
147     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
148     /* store local and global idxs for p0 */
149     pcbddc->benign_p0_lidx = idxs[nz-1];
150     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
151     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
152     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
153     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
154   } else { /* this is unlikely to happen but, just in case, destroy the empty IS */
155     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
156   }
157   *zerodiaglocal = zerodiag;
158   PetscFunctionReturn(0);
159 }
160 
161 #undef __FUNCT__
162 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
163 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
164 {
165   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
166   PetscErrorCode ierr;
167 
168   PetscFunctionBegin;
169   if (!pcbddc->benign_sf) {
170     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
171     if (pcbddc->benign_p0_gidx >= 0) {
172       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
173     } else {
174       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
175     }
176   }
177   if (get) { /* use SF to get values */
178     PetscScalar *array;
179 
180     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
181     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
182     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
183     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
184   } else { /* use VecSetValue */
185     if (pcbddc->benign_p0_gidx >= 0) {
186       ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
187     }
188     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
189     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
190   }
191   PetscFunctionReturn(0);
192 }
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
196 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
197 {
198   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
199   PetscErrorCode ierr;
200 
201   PetscFunctionBegin;
202   /* TODO: add error checking
203     - avoid nested pop (or push) calls.
204     - cannot push before pop.
205     - cannot call this if pcbddc->local_mat is NULL
206   */
207   if (pcbddc->benign_p0_lidx < 0) {
208     PetscFunctionReturn(0);
209   }
210   if (pop) {
211     const PetscInt    *cB0_cols;
212     PetscInt          cB0_ncol;
213     const PetscScalar *cB0_vals;
214 
215     /* extract B_0 */
216     ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
217     pcbddc->B0_ncol = cB0_ncol;
218     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
219     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
220     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
221     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
222     ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
223     /* remove rows and cols from local problem */
224     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
225     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr);
226   } else { /* push */
227     ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
228     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
229     ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr);
230     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
231     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "PCBDDCAdaptiveSelection"
238 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
239 {
240   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
241   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
242   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
243   PetscBLASInt    *B_iwork,*B_ifail;
244   PetscScalar     *work,lwork;
245   PetscScalar     *St,*S,*eigv;
246   PetscScalar     *Sarray,*Starray;
247   PetscReal       *eigs,thresh;
248   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
249   PetscBool       allocated_S_St;
250 #if defined(PETSC_USE_COMPLEX)
251   PetscReal       *rwork;
252 #endif
253   PetscErrorCode  ierr;
254 
255   PetscFunctionBegin;
256   if (!sub_schurs->use_mumps) {
257     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
258   }
259 
260   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
261     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
262   }
263 
264   if (pcbddc->dbg_flag) {
265     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
266     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
267     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
268     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
269   }
270 
271   if (pcbddc->dbg_flag) {
272     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
273   }
274 
275   /* max size of subsets */
276   mss = 0;
277   for (i=0;i<sub_schurs->n_subs;i++) {
278     PetscInt subset_size;
279 
280     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
281     mss = PetscMax(mss,subset_size);
282   }
283 
284   /* min/max and threshold */
285   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
286   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
287   nmax = PetscMax(nmin,nmax);
288   allocated_S_St = PETSC_FALSE;
289   if (nmin) {
290     allocated_S_St = PETSC_TRUE;
291   }
292 
293   /* allocate lapack workspace */
294   cum = cum2 = 0;
295   maxneigs = 0;
296   for (i=0;i<sub_schurs->n_subs;i++) {
297     PetscInt n,subset_size;
298 
299     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
300     n = PetscMin(subset_size,nmax);
301     cum += subset_size;
302     cum2 += subset_size*n;
303     maxneigs = PetscMax(maxneigs,n);
304   }
305   if (mss) {
306     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
307       PetscBLASInt B_itype = 1;
308       PetscBLASInt B_N = mss;
309       PetscReal    zero = 0.0;
310       PetscReal    eps = 0.0; /* dlamch? */
311 
312       B_lwork = -1;
313       S = NULL;
314       St = NULL;
315       eigs = NULL;
316       eigv = NULL;
317       B_iwork = NULL;
318       B_ifail = NULL;
319 #if defined(PETSC_USE_COMPLEX)
320       rwork = NULL;
321 #endif
322       thresh = 1.0;
323       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
324 #if defined(PETSC_USE_COMPLEX)
325       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
326 #else
327       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
328 #endif
329       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
330       ierr = PetscFPTrapPop();CHKERRQ(ierr);
331     } else {
332         /* TODO */
333     }
334   } else {
335     lwork = 0;
336   }
337 
338   nv = 0;
339   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
340     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
341   }
342   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
343   if (allocated_S_St) {
344     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
345   }
346   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
347 #if defined(PETSC_USE_COMPLEX)
348   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
349 #endif
350   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
351                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
352                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
353                       nv+cum,&pcbddc->adaptive_constraints_idxs,
354                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
355   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
356 
357   maxneigs = 0;
358   cum = cum2 = cumarray = 0;
359   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
360   pcbddc->adaptive_constraints_data_ptr[0] = 0;
361   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
362     const PetscInt *idxs;
363 
364     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
365     for (cum=0;cum<nv;cum++) {
366       pcbddc->adaptive_constraints_n[cum] = 1;
367       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
368       pcbddc->adaptive_constraints_data[cum] = 1.0;
369       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
370       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
371     }
372     cum2 = cum;
373     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
374   }
375 
376   if (mss) { /* multilevel */
377     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
378     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
379   }
380 
381   for (i=0;i<sub_schurs->n_subs;i++) {
382 
383     const PetscInt *idxs;
384     PetscReal      infty = PETSC_MAX_REAL;
385     PetscInt       j,subset_size,eigs_start = 0;
386     PetscBLASInt   B_N;
387     PetscBool      same_data = PETSC_FALSE;
388 
389     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
390     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
391     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
392       if (sub_schurs->is_hermitian) {
393         PetscInt j,k;
394         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
395           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
396           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
397         }
398         for (j=0;j<subset_size;j++) {
399           for (k=j;k<subset_size;k++) {
400             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
401             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
402           }
403         }
404       } else {
405         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
406         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
407       }
408     } else {
409       S = Sarray + cumarray;
410       St = Starray + cumarray;
411     }
412 
413     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
414     /* see if we can save some work */
415     if (sub_schurs->n_subs == 1) {
416       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
417     }
418 
419     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
420       B_neigs = 0;
421     } else {
422       /* Threshold: this is an heuristic for edges */
423       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
424 
425       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
426         PetscBLASInt B_itype = 1;
427         PetscBLASInt B_IL, B_IU;
428         PetscReal    eps = -1.0; /* dlamch? */
429         PetscInt     nmin_s;
430 
431         if (pcbddc->dbg_flag) {
432           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
433         }
434 
435         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
436         if (thresh > 1.+PETSC_SMALL) {
437 
438           /* ask for eigenvalues larger than thresh */
439 #if defined(PETSC_USE_COMPLEX)
440           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
441 #else
442           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
443 #endif
444         } else {
445           B_IU = PetscMax(1,PetscMin(B_N,nmax));
446           B_IL = 1;
447 #if defined(PETSC_USE_COMPLEX)
448           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
449 #else
450           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
451 #endif
452         }
453         ierr = PetscFPTrapPop();CHKERRQ(ierr);
454         if (B_ierr) {
455           if (B_ierr < 0 ) {
456             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
457           } else if (B_ierr <= B_N) {
458             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
459           } else {
460             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
461           }
462         }
463 
464         if (B_neigs > nmax) {
465           if (pcbddc->dbg_flag) {
466             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
467           }
468           eigs_start = B_neigs -nmax;
469           B_neigs = nmax;
470         }
471 
472         nmin_s = PetscMin(nmin,B_N);
473         if (B_neigs < nmin_s) {
474           PetscBLASInt B_neigs2;
475 
476           B_IU = B_N - B_neigs;
477           B_IL = B_N - nmin_s + 1;
478           if (pcbddc->dbg_flag) {
479             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
480           }
481           if (sub_schurs->is_hermitian) {
482             PetscInt j;
483             for (j=0;j<subset_size;j++) {
484               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
485             }
486             for (j=0;j<subset_size;j++) {
487               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
488             }
489           } else {
490             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
491             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
492           }
493           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
494 #if defined(PETSC_USE_COMPLEX)
495           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
496 #else
497           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
498 #endif
499           ierr = PetscFPTrapPop();CHKERRQ(ierr);
500           B_neigs += B_neigs2;
501         }
502         if (B_ierr) {
503           if (B_ierr < 0 ) {
504             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
505           } else if (B_ierr <= B_N) {
506             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
507           } else {
508             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
509           }
510         }
511         if (pcbddc->dbg_flag) {
512           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
513           for (j=0;j<B_neigs;j++) {
514             if (eigs[j] == 0.0) {
515               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
516             } else {
517               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
518             }
519           }
520         }
521       } else {
522           /* TODO */
523       }
524     }
525     maxneigs = PetscMax(B_neigs,maxneigs);
526     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
527     if (B_neigs) {
528       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
529 
530       if (pcbddc->dbg_flag > 1) {
531         PetscInt ii;
532         for (ii=0;ii<B_neigs;ii++) {
533           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
534           for (j=0;j<B_N;j++) {
535 #if defined(PETSC_USE_COMPLEX)
536             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
537             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
538             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
539 #else
540             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
541 #endif
542           }
543         }
544       }
545 #if 0
546       for (j=0;j<B_neigs;j++) {
547         PetscBLASInt Blas_N,Blas_one = 1.0;
548         PetscScalar norm;
549         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
550         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
551                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
552         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
553           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
554         } else {
555           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
556         }
557         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
558       }
559 #endif
560       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
561       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
562       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
563       cum++;
564     }
565     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
566     /* shift for next computation */
567     cumarray += subset_size*subset_size;
568   }
569   if (pcbddc->dbg_flag) {
570     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
571   }
572 
573   if (mss) {
574     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
575     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
576     /* destroy matrices (junk) */
577     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
578     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
579   }
580   if (allocated_S_St) {
581     ierr = PetscFree2(S,St);CHKERRQ(ierr);
582   }
583   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
584 #if defined(PETSC_USE_COMPLEX)
585   ierr = PetscFree(rwork);CHKERRQ(ierr);
586 #endif
587   if (pcbddc->dbg_flag) {
588     PetscInt maxneigs_r;
589     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
590     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
591   }
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "PCBDDCSetUpSolvers"
597 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
598 {
599   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
600   PetscScalar    *coarse_submat_vals;
601   PetscErrorCode ierr;
602 
603   PetscFunctionBegin;
604   /* Setup local scatters R_to_B and (optionally) R_to_D */
605   /* PCBDDCSetUpLocalWorkVectors should be called first! */
606   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
607 
608   /* Setup local neumann solver ksp_R */
609   /* PCBDDCSetUpLocalScatters should be called first! */
610   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
611 
612   /* Change global null space passed in by the user if change of basis has been requested */
613   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
614     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
615   }
616 
617   /*
618      Setup local correction and local part of coarse basis.
619      Gives back the dense local part of the coarse matrix in column major ordering
620   */
621   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
622 
623   /* Compute total number of coarse nodes and setup coarse solver */
624   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
625 
626   /* free */
627   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "PCBDDCResetCustomization"
633 PetscErrorCode PCBDDCResetCustomization(PC pc)
634 {
635   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
636   PetscErrorCode ierr;
637 
638   PetscFunctionBegin;
639   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
640   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
641   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
642   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
643   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
644   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
645   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
646   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
647   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
648   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
649   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "PCBDDCResetTopography"
655 PetscErrorCode PCBDDCResetTopography(PC pc)
656 {
657   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
658   PetscErrorCode ierr;
659 
660   PetscFunctionBegin;
661   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
662   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
663   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
664   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
665   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "PCBDDCResetSolvers"
671 PetscErrorCode PCBDDCResetSolvers(PC pc)
672 {
673   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
674   PetscScalar    *array;
675   PetscErrorCode ierr;
676 
677   PetscFunctionBegin;
678   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
679   if (pcbddc->coarse_phi_B) {
680     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
681     ierr = PetscFree(array);CHKERRQ(ierr);
682   }
683   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
684   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
685   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
686   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
687   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
688   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
689   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
690   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
691   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
692   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
693   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
694   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
695   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
696   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
697   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
698   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
699   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
700   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
701   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
702   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
703   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
704   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
705   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
706   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
707   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
708   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
709   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
710   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
716 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
717 {
718   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
719   PC_IS          *pcis = (PC_IS*)pc->data;
720   VecType        impVecType;
721   PetscInt       n_constraints,n_R,old_size,n_benign;
722   PetscErrorCode ierr;
723 
724   PetscFunctionBegin;
725   if (!pcbddc->ConstraintMatrix) {
726     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
727   }
728   /* get sizes */
729   n_benign = 0;
730   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
731   n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices;
732   n_R = pcis->n - pcbddc->n_vertices;
733   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
734   /* local work vectors (try to avoid unneeded work)*/
735   /* R nodes */
736   old_size = -1;
737   if (pcbddc->vec1_R) {
738     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
739   }
740   if (n_R != old_size) {
741     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
742     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
743     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
744     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
745     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
746     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
747   }
748   /* local primal dofs */
749   old_size = -1;
750   if (pcbddc->vec1_P) {
751     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
752   }
753   if (pcbddc->local_primal_size != old_size) {
754     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
755     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
756     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
757     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
758   }
759   /* local explicit constraints */
760   old_size = -1;
761   if (pcbddc->vec1_C) {
762     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
763   }
764   if (n_constraints && n_constraints != old_size) {
765     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
766     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
767     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
768     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "PCBDDCSetUpCorrection"
775 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
776 {
777   PetscErrorCode  ierr;
778   /* pointers to pcis and pcbddc */
779   PC_IS*          pcis = (PC_IS*)pc->data;
780   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
781   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
782   /* submatrices of local problem */
783   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
784   /* submatrices of benign trick */
785   Mat             B0_V = NULL;
786   /* submatrices of local coarse problem */
787   Mat             S_VV,S_CV,S_VC,S_CC;
788   /* working matrices */
789   Mat             C_CR;
790   /* additional working stuff */
791   PC              pc_R;
792   Mat             F,B0 = NULL;
793   PetscBool       isLU,isCHOL,isILU;
794 
795   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
796   PetscScalar     *work;
797   PetscInt        *idx_V_B;
798   PetscInt        n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0;
799   PetscInt        i,n_R,n_D,n_B;
800   PetscBool       unsymmetric_check;
801   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
802   MatType         impMatType;
803   /* some shortcuts to scalars */
804   PetscScalar     one=1.0,m_one=-1.0;
805 
806   PetscFunctionBegin;
807   n_benign = 0;
808   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
809   n_vertices = pcbddc->n_vertices;
810   n_constraints = pcbddc->local_primal_size - n_benign - n_vertices;
811   /* Set Non-overlapping dimensions */
812   n_B = pcis->n_B;
813   n_D = pcis->n - n_B;
814   n_R = pcis->n - n_vertices;
815 
816   /* Set types for local objects needed by BDDC precondtioner */
817   impMatType = MATSEQDENSE;
818 
819   /* vertices in boundary numbering */
820   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
821   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
822   if (i != n_vertices) {
823     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
824   }
825 
826   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
827   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
828   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
829   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
830   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
831   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
832   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
833   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
834   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
835   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
836 
837   unsymmetric_check = PETSC_FALSE;
838   /* allocate workspace */
839   n = 0;
840   if (n_constraints) {
841     n += n_R*n_constraints;
842   }
843   if (n_vertices) {
844     n = PetscMax(2*n_R*n_vertices,n);
845     n = PetscMax((n_R+n_B)*n_vertices,n);
846   }
847   if (!pcbddc->symmetric_primal) {
848     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
849     unsymmetric_check = PETSC_TRUE;
850   }
851   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
852 
853   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
854   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
855   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
856   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
857   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
858   if (isLU || isILU || isCHOL) {
859     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
860   } else if (sub_schurs->reuse_mumps) {
861     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
862     MatFactorType type;
863 
864     F = reuse_mumps->F;
865     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
866     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
867   } else {
868     F = NULL;
869   }
870 
871   /* Precompute stuffs needed for preprocessing and application of BDDC*/
872   if (n_constraints) {
873     Mat         M1,M2,M3;
874     Mat         auxmat;
875     IS          is_aux;
876     PetscScalar *array,*array2;
877 
878     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
879     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
880 
881     /* Extract constraints on R nodes: C_{CR}  */
882     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
883     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
884     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
885 
886     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
887     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
888     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
889     for (i=0;i<n_constraints;i++) {
890       const PetscScalar *row_cmat_values;
891       const PetscInt    *row_cmat_indices;
892       PetscInt          size_of_constraint,j;
893 
894       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
895       for (j=0;j<size_of_constraint;j++) {
896         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
897       }
898       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
899     }
900     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
901     if (F) {
902       Mat B;
903 
904       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
905       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
906       ierr = MatDestroy(&B);CHKERRQ(ierr);
907     } else {
908       PetscScalar *marr;
909 
910       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
911       for (i=0;i<n_constraints;i++) {
912         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
913         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
914         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
915         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
916         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
917       }
918       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
919     }
920     if (!pcbddc->switch_static) {
921       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
922       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
923       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
924       for (i=0;i<n_constraints;i++) {
925         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
926         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
927         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
930         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
931       }
932       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
933       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
934       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
935     } else {
936       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
937       pcbddc->local_auxmat2 = local_auxmat2_R;
938       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
939     }
940     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
941     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
942     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
943     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
944     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
945     if (isCHOL) {
946       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
947     } else {
948       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
949     }
950     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
951     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
952     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
953     ierr = MatDestroy(&M2);CHKERRQ(ierr);
954     ierr = MatDestroy(&M3);CHKERRQ(ierr);
955     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
956     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
957     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
958     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
959     ierr = MatDestroy(&M1);CHKERRQ(ierr);
960   }
961   /* Get submatrices from subdomain matrix */
962   if (n_benign) {
963     IS        dummy;
964     Mat       B0_R;
965     PetscReal norm;
966     PetscInt  ii[2];
967 
968     ii[0] = 0;
969     ii[1] = pcbddc->B0_ncol;
970     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
971     ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
972     ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
973     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
974     if (norm > PETSC_SMALL) {
975       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
976     }
977     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
978     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
979   }
980 
981   if (n_vertices) {
982     IS is_aux;
983 
984     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
985       IS tis;
986 
987       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
988       ierr = ISSort(tis);CHKERRQ(ierr);
989       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
990       ierr = ISDestroy(&tis);CHKERRQ(ierr);
991     } else {
992       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
993     }
994     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
995     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
996     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
997     if (n_benign) {
998       IS dummy;
999 
1000       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
1001       ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1002       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1003     }
1004     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1005   }
1006 
1007   /* Matrix of coarse basis functions (local) */
1008   if (pcbddc->coarse_phi_B) {
1009     PetscInt on_B,on_primal,on_D=n_D;
1010     if (pcbddc->coarse_phi_D) {
1011       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1012     }
1013     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1014     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1015       PetscScalar *marray;
1016 
1017       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1018       ierr = PetscFree(marray);CHKERRQ(ierr);
1019       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1020       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1021       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1022       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1023     }
1024   }
1025 
1026   if (!pcbddc->coarse_phi_B) {
1027     PetscScalar *marray;
1028 
1029     n = n_B*pcbddc->local_primal_size;
1030     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1031       n += n_D*pcbddc->local_primal_size;
1032     }
1033     if (!pcbddc->symmetric_primal) {
1034       n *= 2;
1035     }
1036     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1037     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1038     n = n_B*pcbddc->local_primal_size;
1039     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1040       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1041       n += n_D*pcbddc->local_primal_size;
1042     }
1043     if (!pcbddc->symmetric_primal) {
1044       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1045       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1046         n = n_B*pcbddc->local_primal_size;
1047         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1048       }
1049     } else {
1050       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1051       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1052       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1053         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1054         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1055       }
1056     }
1057   }
1058 
1059   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1060   if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1061     const PetscInt *idxs;
1062 
1063     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1064     ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr);
1065     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1066   }
1067 
1068   /* vertices */
1069   if (n_vertices) {
1070 
1071     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1072 
1073     if (n_R) {
1074       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1075       PetscBLASInt B_N,B_one = 1;
1076       PetscScalar  *x,*y;
1077       PetscBool    isseqaij;
1078 
1079       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1080       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1081       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1082       if (F) { /* TODO could be optimized for symmetric problems */
1083         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1084       } else {
1085         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1086         for (i=0;i<n_vertices;i++) {
1087           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
1088           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1089           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1090           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1091           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1092         }
1093         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1094       }
1095       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1096       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
1097       if (n_constraints) {
1098         Mat B;
1099 
1100         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1101         for (i=0;i<n_vertices;i++) {
1102           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1103           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
1104           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1105           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1106           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1107           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1108         }
1109         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1110         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1111         ierr = MatDestroy(&B);CHKERRQ(ierr);
1112         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1113         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1114         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1115         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
1116         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
1117         ierr = MatDestroy(&B);CHKERRQ(ierr);
1118       }
1119       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1120       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1121         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1122       }
1123       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1124       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1125       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1126       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1127       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1128       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1129       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1130       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1131       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1132       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1133     } else {
1134       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1135     }
1136     if (n_benign) {
1137       const PetscScalar *vals;
1138       const PetscInt    *idxs;
1139       PetscInt          n,j;
1140 
1141       ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
1142       for (j=0;j<n;j++) {
1143         coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j];
1144         coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j];
1145       }
1146       ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
1147     }
1148     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1149 
1150     /* coarse basis functions */
1151     for (i=0;i<n_vertices;i++) {
1152       PetscScalar *y;
1153 
1154       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1155       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1156       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1157       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1158       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159       y[n_B*i+idx_V_B[i]] = 1.0;
1160       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1161       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1162 
1163       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1164         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1165         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1166         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1167         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1169         if (n_benign) y[n_D*i+p0_lidx_I] = 0.0;
1170         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1171       }
1172       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1173     }
1174     /* if n_R == 0 the object is not destroyed */
1175     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1176   }
1177 
1178   if (n_constraints) {
1179     Mat B;
1180 
1181     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1182     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1183     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1184     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1185     if (n_vertices) {
1186       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1187         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1188       } else {
1189         Mat S_VCt;
1190 
1191         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1192         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1193         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1194       }
1195     }
1196     ierr = MatDestroy(&B);CHKERRQ(ierr);
1197     /* coarse basis functions */
1198     for (i=0;i<n_constraints;i++) {
1199       PetscScalar *y;
1200 
1201       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1202       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1203       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1204       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1205       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1207       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1208       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1209         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1210         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1211         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1214         if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0;
1215         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1216       }
1217       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1218     }
1219   }
1220   if (n_constraints) {
1221     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1222   }
1223 
1224   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1225 
1226   /* compute other basis functions for non-symmetric problems */
1227   if (!pcbddc->symmetric_primal) {
1228 
1229     if (n_constraints) {
1230       Mat S_CCT,B_C;
1231 
1232       /* this is a lazy thing */
1233       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1234       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1235       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1236       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1237       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1238       if (n_vertices) {
1239         Mat B_V,S_VCT;
1240 
1241         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1242         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1243         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1244         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1245         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1246       }
1247       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1248     } else { /* if there are no constraints, reset work */
1249       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1250     }
1251     if (n_vertices && n_R) {
1252       Mat          A_VRT;
1253       PetscScalar  *marray;
1254       PetscBLASInt B_N,B_one = 1;
1255 
1256       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1257       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1258       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1259       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1260       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1261       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1262       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1263     }
1264 
1265     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1266       for (i=0;i<pcbddc->local_primal_size;i++) {
1267         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1268         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1269         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1270         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1271         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1272       }
1273     } else {
1274       for (i=0;i<pcbddc->local_primal_size;i++) {
1275         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1276         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1277         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1278         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1279         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1280       }
1281     }
1282     /* coarse basis functions */
1283     for (i=0;i<pcbddc->local_primal_size;i++) {
1284       PetscScalar *y;
1285 
1286       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1287       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1288       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1289       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1290       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1291       if (i<n_vertices) {
1292         y[n_B*i+idx_V_B[i]] = 1.0;
1293       }
1294       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1295       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1296 
1297       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1298         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1299         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1300         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1301         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1303         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1304       }
1305       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1306     }
1307   }
1308   /* free memory */
1309   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1310   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1311   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1312   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1313   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1314   ierr = PetscFree(work);CHKERRQ(ierr);
1315   if (n_vertices) {
1316     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1317   }
1318   if (n_constraints) {
1319     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1320   }
1321   /* Checking coarse_sub_mat and coarse basis functios */
1322   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1323   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1324   if (pcbddc->dbg_flag) {
1325     Mat         coarse_sub_mat;
1326     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1327     Mat         coarse_phi_D,coarse_phi_B;
1328     Mat         coarse_psi_D,coarse_psi_B;
1329     Mat         A_II,A_BB,A_IB,A_BI;
1330     Mat         C_B,CPHI;
1331     IS          is_dummy;
1332     Vec         mones;
1333     MatType     checkmattype=MATSEQAIJ;
1334     PetscReal   real_value;
1335 
1336     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1337     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1338     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1339     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1340     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1341     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1342     if (unsymmetric_check) {
1343       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1344       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1345     }
1346     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1347 
1348     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1349     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1350     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1351     if (unsymmetric_check) {
1352       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1353       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1354       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1355       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1356       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1357       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1358       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1359       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1360       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1361       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1362       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1363       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1364     } else {
1365       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1366       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1367       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1368       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1369       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1370       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1371       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1372       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1373     }
1374     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1375     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1376     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1377     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1378     if (n_benign) {
1379       Mat B0_I,B0_B,B0_BPHI,B0_IPHI;
1380       PetscScalar *data,*data2;
1381 
1382       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr);
1383       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1384       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1385       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1386       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1387       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1388       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1389       for (i=0;i<pcbddc->local_primal_size;i++) {
1390         data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i];
1391         data[(i+1)*pcbddc->local_primal_size-1] += data2[i];
1392       }
1393       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1394       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1395       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1396       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1397       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1398       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1399       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1400       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1401       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1402     }
1403 #if 0
1404   {
1405     PetscViewer viewer;
1406     char filename[256];
1407     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1408     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1409     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1410     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1411     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1412   }
1413 #endif
1414     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1415     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1416     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1417     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1418 
1419     /* check constraints */
1420     if (!n_benign) {
1421       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1422       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1423       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1424       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1425       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1426       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1427       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1428       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1429       if (unsymmetric_check) {
1430         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1431         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1432         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1433         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1434         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1435       }
1436       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1437       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1438       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1439       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1440     }
1441     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1442     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1443     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1444     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1445     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1446     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1447     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1448     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1449     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1450     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1451     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1452     if (unsymmetric_check) {
1453       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1454       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1455     }
1456     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1457   }
1458   ierr = MatDestroy(&B0);CHKERRQ(ierr);
1459   /* get back data */
1460   *coarse_submat_vals_n = coarse_submat_vals;
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1466 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1467 {
1468   Mat            *work_mat;
1469   IS             isrow_s,iscol_s;
1470   PetscBool      rsorted,csorted;
1471   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1472   PetscErrorCode ierr;
1473 
1474   PetscFunctionBegin;
1475   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1476   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1477   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1478   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1479 
1480   if (!rsorted) {
1481     const PetscInt *idxs;
1482     PetscInt *idxs_sorted,i;
1483 
1484     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1485     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1486     for (i=0;i<rsize;i++) {
1487       idxs_perm_r[i] = i;
1488     }
1489     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1490     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1491     for (i=0;i<rsize;i++) {
1492       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1493     }
1494     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1495     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1496   } else {
1497     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1498     isrow_s = isrow;
1499   }
1500 
1501   if (!csorted) {
1502     if (isrow == iscol) {
1503       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1504       iscol_s = isrow_s;
1505     } else {
1506       const PetscInt *idxs;
1507       PetscInt *idxs_sorted,i;
1508 
1509       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1510       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1511       for (i=0;i<csize;i++) {
1512         idxs_perm_c[i] = i;
1513       }
1514       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1515       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1516       for (i=0;i<csize;i++) {
1517         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1518       }
1519       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1520       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1521     }
1522   } else {
1523     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1524     iscol_s = iscol;
1525   }
1526 
1527   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1528 
1529   if (!rsorted || !csorted) {
1530     Mat      new_mat;
1531     IS       is_perm_r,is_perm_c;
1532 
1533     if (!rsorted) {
1534       PetscInt *idxs_r,i;
1535       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1536       for (i=0;i<rsize;i++) {
1537         idxs_r[idxs_perm_r[i]] = i;
1538       }
1539       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1540       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1541     } else {
1542       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1543     }
1544     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1545 
1546     if (!csorted) {
1547       if (isrow_s == iscol_s) {
1548         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1549         is_perm_c = is_perm_r;
1550       } else {
1551         PetscInt *idxs_c,i;
1552         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1553         for (i=0;i<csize;i++) {
1554           idxs_c[idxs_perm_c[i]] = i;
1555         }
1556         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1557         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1558       }
1559     } else {
1560       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1561     }
1562     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1563 
1564     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1565     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1566     work_mat[0] = new_mat;
1567     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1568     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1569   }
1570 
1571   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1572   *B = work_mat[0];
1573   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1574   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1575   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1581 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1582 {
1583   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1584   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1585   Mat            new_mat;
1586   IS             is_local,is_global;
1587   PetscInt       local_size;
1588   PetscBool      isseqaij;
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1593   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1594   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1595   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1596   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1597   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1598   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1599 
1600   /* check */
1601   if (pcbddc->dbg_flag) {
1602     Vec       x,x_change;
1603     PetscReal error;
1604 
1605     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1606     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1607     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1608     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1611     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1612     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1613     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1614     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1615     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1616     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1617     ierr = VecDestroy(&x);CHKERRQ(ierr);
1618     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1619   }
1620 
1621   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1622   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1623   if (isseqaij) {
1624     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1625   } else {
1626     Mat work_mat;
1627     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1628     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1629     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1630   }
1631   if (matis->A->symmetric_set) {
1632     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1633 #if !defined(PETSC_USE_COMPLEX)
1634     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1635 #endif
1636   }
1637   /*
1638   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1639   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1640   */
1641   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1647 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1648 {
1649   PC_IS*          pcis = (PC_IS*)(pc->data);
1650   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1651   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1652   PetscInt        *idx_R_local=NULL;
1653   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1654   PetscInt        vbs,bs;
1655   PetscBT         bitmask=NULL;
1656   PetscErrorCode  ierr;
1657 
1658   PetscFunctionBegin;
1659   /*
1660     No need to setup local scatters if
1661       - primal space is unchanged
1662         AND
1663       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1664         AND
1665       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1666   */
1667   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1668     PetscFunctionReturn(0);
1669   }
1670   /* destroy old objects */
1671   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1672   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1673   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1674   /* Set Non-overlapping dimensions */
1675   n_B = pcis->n_B;
1676   n_D = pcis->n - n_B;
1677   n_vertices = pcbddc->n_vertices;
1678 
1679   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1680 
1681   /* create auxiliary bitmask and allocate workspace */
1682   if (!sub_schurs->reuse_mumps) {
1683     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1684     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1685     for (i=0;i<n_vertices;i++) {
1686       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1687     }
1688 
1689     for (i=0, n_R=0; i<pcis->n; i++) {
1690       if (!PetscBTLookup(bitmask,i)) {
1691         idx_R_local[n_R++] = i;
1692       }
1693     }
1694   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1695     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1696 
1697     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1698     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1699   }
1700 
1701   /* Block code */
1702   vbs = 1;
1703   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1704   if (bs>1 && !(n_vertices%bs)) {
1705     PetscBool is_blocked = PETSC_TRUE;
1706     PetscInt  *vary;
1707     if (!sub_schurs->reuse_mumps) {
1708       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1709       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1710       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1711       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1712       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1713       for (i=0; i<pcis->n/bs; i++) {
1714         if (vary[i]!=0 && vary[i]!=bs) {
1715           is_blocked = PETSC_FALSE;
1716           break;
1717         }
1718       }
1719       ierr = PetscFree(vary);CHKERRQ(ierr);
1720     } else {
1721       /* Verify directly the R set */
1722       for (i=0; i<n_R/bs; i++) {
1723         PetscInt j,node=idx_R_local[bs*i];
1724         for (j=1; j<bs; j++) {
1725           if (node != idx_R_local[bs*i+j]-j) {
1726             is_blocked = PETSC_FALSE;
1727             break;
1728           }
1729         }
1730       }
1731     }
1732     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1733       vbs = bs;
1734       for (i=0;i<n_R/vbs;i++) {
1735         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1736       }
1737     }
1738   }
1739   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1740   if (sub_schurs->reuse_mumps) {
1741     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1742 
1743     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1744     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1745     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1746     reuse_mumps->is_R = pcbddc->is_R_local;
1747   } else {
1748     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1749   }
1750 
1751   /* print some info if requested */
1752   if (pcbddc->dbg_flag) {
1753     PetscInt benign = 0;
1754 
1755     if (pcbddc->benign_p0_lidx >= 0) benign = 1;
1756     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1757     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1758     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1759     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1760     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1761     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr);
1762     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1763   }
1764 
1765   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1766   if (!sub_schurs->reuse_mumps) {
1767     IS       is_aux1,is_aux2;
1768     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1769 
1770     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1771     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1772     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1773     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1774     for (i=0; i<n_D; i++) {
1775       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1776     }
1777     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1778     for (i=0, j=0; i<n_R; i++) {
1779       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1780         aux_array1[j++] = i;
1781       }
1782     }
1783     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1784     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1785     for (i=0, j=0; i<n_B; i++) {
1786       if (!PetscBTLookup(bitmask,is_indices[i])) {
1787         aux_array2[j++] = i;
1788       }
1789     }
1790     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1791     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1792     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1793     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1794     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1795 
1796     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1797       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1798       for (i=0, j=0; i<n_R; i++) {
1799         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1800           aux_array1[j++] = i;
1801         }
1802       }
1803       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1804       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1805       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1806     }
1807     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1808     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1809   } else {
1810     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1811     IS               tis;
1812     PetscInt         schur_size;
1813 
1814     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1815     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1816     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1817     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1818     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1819       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1820       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1821       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1822     }
1823   }
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1830 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1831 {
1832   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1833   PC_IS          *pcis = (PC_IS*)pc->data;
1834   PC             pc_temp;
1835   Mat            A_RR;
1836   MatReuse       reuse;
1837   PetscScalar    m_one = -1.0;
1838   PetscReal      value;
1839   PetscInt       n_D,n_R;
1840   PetscBool      use_exact,use_exact_reduced,issbaij;
1841   PetscErrorCode ierr;
1842   /* prefixes stuff */
1843   char           dir_prefix[256],neu_prefix[256],str_level[16];
1844   size_t         len;
1845 
1846   PetscFunctionBegin;
1847 
1848   /* compute prefixes */
1849   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1850   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1851   if (!pcbddc->current_level) {
1852     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1853     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1854     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1855     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1856   } else {
1857     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1858     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1859     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1860     len -= 15; /* remove "pc_bddc_coarse_" */
1861     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1862     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1863     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1864     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1865     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1866     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1867     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1868     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1869   }
1870 
1871   /* DIRICHLET PROBLEM */
1872   if (dirichlet) {
1873     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1874     if (pcbddc->local_mat->symmetric_set) {
1875       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1876     }
1877     /* Matrix for Dirichlet problem is pcis->A_II */
1878     n_D = pcis->n - pcis->n_B;
1879     if (!pcbddc->ksp_D) { /* create object if not yet build */
1880       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1881       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1882       /* default */
1883       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1884       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1885       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1886       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1887       if (issbaij) {
1888         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1889       } else {
1890         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1891       }
1892       /* Allow user's customization */
1893       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1894       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1895     }
1896     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1897     if (sub_schurs->reuse_mumps) {
1898       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1899 
1900       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1901     }
1902     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1903     if (!n_D) {
1904       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1905       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1906     }
1907     /* Set Up KSP for Dirichlet problem of BDDC */
1908     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1909     /* set ksp_D into pcis data */
1910     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1911     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1912     pcis->ksp_D = pcbddc->ksp_D;
1913   }
1914 
1915   /* NEUMANN PROBLEM */
1916   A_RR = 0;
1917   if (neumann) {
1918     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1919     PetscInt        ibs,mbs;
1920     PetscBool       issbaij;
1921     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1922     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1923     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1924     if (pcbddc->ksp_R) { /* already created ksp */
1925       PetscInt nn_R;
1926       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1927       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1928       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1929       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1930         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1931         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1932         reuse = MAT_INITIAL_MATRIX;
1933       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1934         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1935           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1936           reuse = MAT_INITIAL_MATRIX;
1937         } else { /* safe to reuse the matrix */
1938           reuse = MAT_REUSE_MATRIX;
1939         }
1940       }
1941       /* last check */
1942       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1943         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1944         reuse = MAT_INITIAL_MATRIX;
1945       }
1946     } else { /* first time, so we need to create the matrix */
1947       reuse = MAT_INITIAL_MATRIX;
1948     }
1949     /* extract A_RR */
1950     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1951     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1952     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1953     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1954       if (matis->A == pcbddc->local_mat) {
1955         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1956         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1957       } else {
1958         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1959       }
1960     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1961       if (matis->A == pcbddc->local_mat) {
1962         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1963         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1964       } else {
1965         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1966       }
1967     }
1968     if (!sub_schurs->reuse_mumps) {
1969       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1970       if (pcbddc->local_mat->symmetric_set) {
1971         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1972       }
1973     } else {
1974       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1975 
1976       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1977       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1978       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1979     }
1980     if (!pcbddc->ksp_R) { /* create object if not present */
1981       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1982       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1983       /* default */
1984       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1985       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1986       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1987       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1988       if (issbaij) {
1989         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1990       } else {
1991         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1992       }
1993       /* Allow user's customization */
1994       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1995       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1996     }
1997     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1998     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1999     if (!n_R) {
2000       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2001       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2002     }
2003     /* Reuse MUMPS solver if it is present */
2004     if (sub_schurs->reuse_mumps) {
2005       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2006 
2007       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2008     }
2009     /* Set Up KSP for Neumann problem of BDDC */
2010     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2011   }
2012   /* free Neumann problem's matrix */
2013   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2014 
2015   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2016   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2017     if (pcbddc->dbg_flag) {
2018       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2019       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2020       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2021     }
2022     if (dirichlet) { /* Dirichlet */
2023       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2024       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2025       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2026       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2027       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2028       /* need to be adapted? */
2029       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2030       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2031       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2032       /* print info */
2033       if (pcbddc->dbg_flag) {
2034         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2035         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2036       }
2037       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2038         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2039       }
2040     }
2041     if (neumann) { /* Neumann */
2042       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2043       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2044       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2045       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2046       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2047       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2048       /* need to be adapted? */
2049       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2050       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2051       /* print info */
2052       if (pcbddc->dbg_flag) {
2053         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2054         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2055       }
2056       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2057         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2058       }
2059     }
2060   }
2061   PetscFunctionReturn(0);
2062 }
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2066 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2067 {
2068   PetscErrorCode  ierr;
2069   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2070   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2071 
2072   PetscFunctionBegin;
2073   if (!sub_schurs->reuse_mumps) {
2074     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2075   }
2076   if (!pcbddc->switch_static) {
2077     if (applytranspose && pcbddc->local_auxmat1) {
2078       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2079       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2080     }
2081     if (!sub_schurs->reuse_mumps) {
2082       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2083       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2084     } else {
2085       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2086 
2087       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2088       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2089     }
2090   } else {
2091     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2092     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2093     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2094     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2095     if (applytranspose && pcbddc->local_auxmat1) {
2096       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2097       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2098       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2099       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2100     }
2101   }
2102   if (!sub_schurs->reuse_mumps) {
2103     if (applytranspose) {
2104       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2105     } else {
2106       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2107     }
2108 #if defined(PETSC_HAVE_MUMPS)
2109   } else {
2110     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2111 
2112     if (applytranspose) {
2113       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2114     } else {
2115       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2116     }
2117 #endif
2118   }
2119   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2120   if (!pcbddc->switch_static) {
2121     if (!sub_schurs->reuse_mumps) {
2122       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2123       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2124     } else {
2125       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2126 
2127       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2128       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2129     }
2130     if (!applytranspose && pcbddc->local_auxmat1) {
2131       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2132       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2133     }
2134   } else {
2135     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2136     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2137     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2138     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2139     if (!applytranspose && pcbddc->local_auxmat1) {
2140       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2141       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2142     }
2143     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2144     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2145     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2146     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2147   }
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2152 #undef __FUNCT__
2153 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2154 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2155 {
2156   PetscErrorCode ierr;
2157   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2158   PC_IS*            pcis = (PC_IS*)  (pc->data);
2159   const PetscScalar zero = 0.0;
2160 
2161   PetscFunctionBegin;
2162   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2163   if (applytranspose) {
2164     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2165     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2166   } else {
2167     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2168     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2169   }
2170 
2171   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2172   if (pcbddc->benign_p0_lidx >= 0) {
2173     PetscScalar *array;
2174 
2175     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2176     array[pcbddc->local_primal_size-1] += pcbddc->benign_p0;
2177     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2178   }
2179 
2180   /* start communications from local primal nodes to rhs of coarse solver */
2181   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2182   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2184 
2185   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2186   /* TODO remove null space when doing multilevel */
2187   if (pcbddc->coarse_ksp) {
2188     Vec rhs,sol;
2189 
2190     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2191     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2192     if (applytranspose) {
2193       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2194     } else {
2195       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2196     }
2197   }
2198 
2199   /* Local solution on R nodes */
2200   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2201     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2202   }
2203 
2204   /* communications from coarse sol to local primal nodes */
2205   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2206   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2207 
2208   /* Sum contributions from two levels */
2209   if (applytranspose) {
2210     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2211     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2212   } else {
2213     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2214     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2215   }
2216   /* store p0 */
2217   if (pcbddc->benign_p0_lidx >= 0) {
2218     PetscScalar *array;
2219 
2220     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2221     pcbddc->benign_p0 = array[pcbddc->local_primal_size-1];
2222     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2229 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2230 {
2231   PetscErrorCode ierr;
2232   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2233   PetscScalar    *array;
2234   Vec            from,to;
2235 
2236   PetscFunctionBegin;
2237   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2238     from = pcbddc->coarse_vec;
2239     to = pcbddc->vec1_P;
2240     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2241       Vec tvec;
2242 
2243       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2244       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2245       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2246       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2247       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2248       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2249     }
2250   } else { /* from local to global -> put data in coarse right hand side */
2251     from = pcbddc->vec1_P;
2252     to = pcbddc->coarse_vec;
2253   }
2254   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2260 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2261 {
2262   PetscErrorCode ierr;
2263   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2264   PetscScalar    *array;
2265   Vec            from,to;
2266 
2267   PetscFunctionBegin;
2268   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2269     from = pcbddc->coarse_vec;
2270     to = pcbddc->vec1_P;
2271   } else { /* from local to global -> put data in coarse right hand side */
2272     from = pcbddc->vec1_P;
2273     to = pcbddc->coarse_vec;
2274   }
2275   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2276   if (smode == SCATTER_FORWARD) {
2277     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2278       Vec tvec;
2279 
2280       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2281       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2282       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2283       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2284     }
2285   } else {
2286     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2287      ierr = VecResetArray(from);CHKERRQ(ierr);
2288     }
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 /* uncomment for testing purposes */
2294 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2295 #undef __FUNCT__
2296 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2297 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2298 {
2299   PetscErrorCode    ierr;
2300   PC_IS*            pcis = (PC_IS*)(pc->data);
2301   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2302   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2303   /* one and zero */
2304   PetscScalar       one=1.0,zero=0.0;
2305   /* space to store constraints and their local indices */
2306   PetscScalar       *constraints_data;
2307   PetscInt          *constraints_idxs,*constraints_idxs_B;
2308   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2309   PetscInt          *constraints_n;
2310   /* iterators */
2311   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2312   /* BLAS integers */
2313   PetscBLASInt      lwork,lierr;
2314   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2315   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2316   /* reuse */
2317   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2318   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2319   /* change of basis */
2320   PetscBool         qr_needed;
2321   PetscBT           change_basis,qr_needed_idx;
2322   /* auxiliary stuff */
2323   PetscInt          *nnz,*is_indices;
2324   PetscInt          ncc;
2325   /* some quantities */
2326   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2327   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2328 
2329   PetscFunctionBegin;
2330   /* Destroy Mat objects computed previously */
2331   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2332   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2333   /* save info on constraints from previous setup (if any) */
2334   olocal_primal_size = pcbddc->local_primal_size;
2335   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2336   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2337   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2338   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2339   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2340   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2341 
2342   /* print some info */
2343   if (pcbddc->dbg_flag) {
2344     IS       vertices;
2345     PetscInt nv,nedges,nfaces;
2346     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2347     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2348     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2349     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2350     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2351     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2352     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2353     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2354     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2355   }
2356 
2357   if (!pcbddc->adaptive_selection) {
2358     IS           ISForVertices,*ISForFaces,*ISForEdges;
2359     MatNullSpace nearnullsp;
2360     const Vec    *nearnullvecs;
2361     Vec          *localnearnullsp;
2362     PetscScalar  *array;
2363     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2364     PetscBool    nnsp_has_cnst;
2365     /* LAPACK working arrays for SVD or POD */
2366     PetscBool    skip_lapack,boolforchange;
2367     PetscScalar  *work;
2368     PetscReal    *singular_vals;
2369 #if defined(PETSC_USE_COMPLEX)
2370     PetscReal    *rwork;
2371 #endif
2372 #if defined(PETSC_MISSING_LAPACK_GESVD)
2373     PetscScalar  *temp_basis,*correlation_mat;
2374 #else
2375     PetscBLASInt dummy_int=1;
2376     PetscScalar  dummy_scalar=1.;
2377 #endif
2378 
2379     /* Get index sets for faces, edges and vertices from graph */
2380     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2381     /* free unneeded index sets */
2382     if (!pcbddc->use_vertices) {
2383       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2384     }
2385     if (!pcbddc->use_edges) {
2386       for (i=0;i<n_ISForEdges;i++) {
2387         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2388       }
2389       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2390       n_ISForEdges = 0;
2391     }
2392     if (!pcbddc->use_faces) {
2393       for (i=0;i<n_ISForFaces;i++) {
2394         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2395       }
2396       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2397       n_ISForFaces = 0;
2398     }
2399 
2400 #if defined(PETSC_USE_DEBUG)
2401     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2402        Also use_change_of_basis should be consistent among processors */
2403     if (pcbddc->NullSpace) {
2404       PetscBool tbool[2],gbool[2];
2405 
2406       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2407         pcbddc->use_change_of_basis = PETSC_TRUE;
2408         if (!ISForEdges) {
2409           pcbddc->use_change_on_faces = PETSC_TRUE;
2410         }
2411       }
2412       tbool[0] = pcbddc->use_change_of_basis;
2413       tbool[1] = pcbddc->use_change_on_faces;
2414       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2415       pcbddc->use_change_of_basis = gbool[0];
2416       pcbddc->use_change_on_faces = gbool[1];
2417     }
2418 #endif
2419 
2420     /* check if near null space is attached to global mat */
2421     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2422     if (nearnullsp) {
2423       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2424       /* remove any stored info */
2425       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2426       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2427       /* store information for BDDC solver reuse */
2428       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2429       pcbddc->onearnullspace = nearnullsp;
2430       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2431       for (i=0;i<nnsp_size;i++) {
2432         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2433       }
2434     } else { /* if near null space is not provided BDDC uses constants by default */
2435       nnsp_size = 0;
2436       nnsp_has_cnst = PETSC_TRUE;
2437     }
2438     /* get max number of constraints on a single cc */
2439     max_constraints = nnsp_size;
2440     if (nnsp_has_cnst) max_constraints++;
2441 
2442     /*
2443          Evaluate maximum storage size needed by the procedure
2444          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2445          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2446          There can be multiple constraints per connected component
2447                                                                                                                                                            */
2448     n_vertices = 0;
2449     if (ISForVertices) {
2450       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2451     }
2452     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2453     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2454 
2455     total_counts = n_ISForFaces+n_ISForEdges;
2456     total_counts *= max_constraints;
2457     total_counts += n_vertices;
2458     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2459 
2460     total_counts = 0;
2461     max_size_of_constraint = 0;
2462     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2463       IS used_is;
2464       if (i<n_ISForEdges) {
2465         used_is = ISForEdges[i];
2466       } else {
2467         used_is = ISForFaces[i-n_ISForEdges];
2468       }
2469       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2470       total_counts += j;
2471       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2472     }
2473     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2474 
2475     /* get local part of global near null space vectors */
2476     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2477     for (k=0;k<nnsp_size;k++) {
2478       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2479       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2480       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2481     }
2482 
2483     /* whether or not to skip lapack calls */
2484     skip_lapack = PETSC_TRUE;
2485     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2486 
2487     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2488     if (!skip_lapack) {
2489       PetscScalar temp_work;
2490 
2491 #if defined(PETSC_MISSING_LAPACK_GESVD)
2492       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2493       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2494       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2495       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2496 #if defined(PETSC_USE_COMPLEX)
2497       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2498 #endif
2499       /* now we evaluate the optimal workspace using query with lwork=-1 */
2500       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2501       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2502       lwork = -1;
2503       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2504 #if !defined(PETSC_USE_COMPLEX)
2505       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2506 #else
2507       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2508 #endif
2509       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2510       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2511 #else /* on missing GESVD */
2512       /* SVD */
2513       PetscInt max_n,min_n;
2514       max_n = max_size_of_constraint;
2515       min_n = max_constraints;
2516       if (max_size_of_constraint < max_constraints) {
2517         min_n = max_size_of_constraint;
2518         max_n = max_constraints;
2519       }
2520       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2521 #if defined(PETSC_USE_COMPLEX)
2522       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2523 #endif
2524       /* now we evaluate the optimal workspace using query with lwork=-1 */
2525       lwork = -1;
2526       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2527       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2528       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2529       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2530 #if !defined(PETSC_USE_COMPLEX)
2531       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2532 #else
2533       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2534 #endif
2535       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2536       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2537 #endif /* on missing GESVD */
2538       /* Allocate optimal workspace */
2539       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2540       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2541     }
2542     /* Now we can loop on constraining sets */
2543     total_counts = 0;
2544     constraints_idxs_ptr[0] = 0;
2545     constraints_data_ptr[0] = 0;
2546     /* vertices */
2547     if (n_vertices) {
2548       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2549       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2550       for (i=0;i<n_vertices;i++) {
2551         constraints_n[total_counts] = 1;
2552         constraints_data[total_counts] = 1.0;
2553         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2554         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2555         total_counts++;
2556       }
2557       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2558       n_vertices = total_counts;
2559     }
2560 
2561     /* edges and faces */
2562     total_counts_cc = total_counts;
2563     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2564       IS        used_is;
2565       PetscBool idxs_copied = PETSC_FALSE;
2566 
2567       if (ncc<n_ISForEdges) {
2568         used_is = ISForEdges[ncc];
2569         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2570       } else {
2571         used_is = ISForFaces[ncc-n_ISForEdges];
2572         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2573       }
2574       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2575 
2576       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2577       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2578       /* change of basis should not be performed on local periodic nodes */
2579       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2580       if (nnsp_has_cnst) {
2581         PetscScalar quad_value;
2582 
2583         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2584         idxs_copied = PETSC_TRUE;
2585 
2586         if (!pcbddc->use_nnsp_true) {
2587           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2588         } else {
2589           quad_value = 1.0;
2590         }
2591         for (j=0;j<size_of_constraint;j++) {
2592           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2593         }
2594         temp_constraints++;
2595         total_counts++;
2596       }
2597       for (k=0;k<nnsp_size;k++) {
2598         PetscReal real_value;
2599         PetscScalar *ptr_to_data;
2600 
2601         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2602         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2603         for (j=0;j<size_of_constraint;j++) {
2604           ptr_to_data[j] = array[is_indices[j]];
2605         }
2606         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2607         /* check if array is null on the connected component */
2608         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2609         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2610         if (real_value > 0.0) { /* keep indices and values */
2611           temp_constraints++;
2612           total_counts++;
2613           if (!idxs_copied) {
2614             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2615             idxs_copied = PETSC_TRUE;
2616           }
2617         }
2618       }
2619       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2620       valid_constraints = temp_constraints;
2621       if (!pcbddc->use_nnsp_true && temp_constraints) {
2622         if (temp_constraints == 1) { /* just normalize the constraint */
2623           PetscScalar norm,*ptr_to_data;
2624 
2625           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2626           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2627           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2628           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2629           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2630         } else { /* perform SVD */
2631           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2632           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2633 
2634 #if defined(PETSC_MISSING_LAPACK_GESVD)
2635           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2636              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2637              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2638                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2639                 from that computed using LAPACKgesvd
2640              -> This is due to a different computation of eigenvectors in LAPACKheev
2641              -> The quality of the POD-computed basis will be the same */
2642           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2643           /* Store upper triangular part of correlation matrix */
2644           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2645           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2646           for (j=0;j<temp_constraints;j++) {
2647             for (k=0;k<j+1;k++) {
2648               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2649             }
2650           }
2651           /* compute eigenvalues and eigenvectors of correlation matrix */
2652           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2653           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2654 #if !defined(PETSC_USE_COMPLEX)
2655           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2656 #else
2657           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2658 #endif
2659           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2660           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2661           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2662           j = 0;
2663           while (j < temp_constraints && singular_vals[j] < tol) j++;
2664           total_counts = total_counts-j;
2665           valid_constraints = temp_constraints-j;
2666           /* scale and copy POD basis into used quadrature memory */
2667           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2668           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2669           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2670           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2671           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2672           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2673           if (j<temp_constraints) {
2674             PetscInt ii;
2675             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2676             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2677             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2678             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2679             for (k=0;k<temp_constraints-j;k++) {
2680               for (ii=0;ii<size_of_constraint;ii++) {
2681                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2682               }
2683             }
2684           }
2685 #else  /* on missing GESVD */
2686           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2687           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2688           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2689           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2690 #if !defined(PETSC_USE_COMPLEX)
2691           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2692 #else
2693           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2694 #endif
2695           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2696           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2697           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2698           k = temp_constraints;
2699           if (k > size_of_constraint) k = size_of_constraint;
2700           j = 0;
2701           while (j < k && singular_vals[k-j-1] < tol) j++;
2702           valid_constraints = k-j;
2703           total_counts = total_counts-temp_constraints+valid_constraints;
2704 #endif /* on missing GESVD */
2705         }
2706       }
2707       /* update pointers information */
2708       if (valid_constraints) {
2709         constraints_n[total_counts_cc] = valid_constraints;
2710         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2711         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2712         /* set change_of_basis flag */
2713         if (boolforchange) {
2714           PetscBTSet(change_basis,total_counts_cc);
2715         }
2716         total_counts_cc++;
2717       }
2718     }
2719     /* free workspace */
2720     if (!skip_lapack) {
2721       ierr = PetscFree(work);CHKERRQ(ierr);
2722 #if defined(PETSC_USE_COMPLEX)
2723       ierr = PetscFree(rwork);CHKERRQ(ierr);
2724 #endif
2725       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2726 #if defined(PETSC_MISSING_LAPACK_GESVD)
2727       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2728       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2729 #endif
2730     }
2731     for (k=0;k<nnsp_size;k++) {
2732       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2733     }
2734     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2735     /* free index sets of faces, edges and vertices */
2736     for (i=0;i<n_ISForFaces;i++) {
2737       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2738     }
2739     if (n_ISForFaces) {
2740       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2741     }
2742     for (i=0;i<n_ISForEdges;i++) {
2743       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2744     }
2745     if (n_ISForEdges) {
2746       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2747     }
2748     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2749   } else {
2750     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2751 
2752     total_counts = 0;
2753     n_vertices = 0;
2754     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2755       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2756     }
2757     max_constraints = 0;
2758     total_counts_cc = 0;
2759     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2760       total_counts += pcbddc->adaptive_constraints_n[i];
2761       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2762       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2763     }
2764     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2765     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2766     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2767     constraints_data = pcbddc->adaptive_constraints_data;
2768     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2769     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2770     total_counts_cc = 0;
2771     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2772       if (pcbddc->adaptive_constraints_n[i]) {
2773         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2774       }
2775     }
2776 #if 0
2777     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2778     for (i=0;i<total_counts_cc;i++) {
2779       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2780       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2781       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2782         printf(" %d",constraints_idxs[j]);
2783       }
2784       printf("\n");
2785       printf("number of cc: %d\n",constraints_n[i]);
2786     }
2787     for (i=0;i<n_vertices;i++) {
2788       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2789     }
2790     for (i=0;i<sub_schurs->n_subs;i++) {
2791       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2792     }
2793 #endif
2794 
2795     max_size_of_constraint = 0;
2796     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2797     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2798     /* Change of basis */
2799     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2800     if (pcbddc->use_change_of_basis) {
2801       for (i=0;i<sub_schurs->n_subs;i++) {
2802         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2803           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2804         }
2805       }
2806     }
2807   }
2808   pcbddc->local_primal_size = total_counts;
2809   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2810   ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2811 
2812   /* map constraints_idxs in boundary numbering */
2813   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2814   if (i != constraints_idxs_ptr[total_counts_cc]) {
2815     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2816   }
2817 
2818   /* Create constraint matrix */
2819   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2820   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2821   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2822 
2823   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2824   /* determine if a QR strategy is needed for change of basis */
2825   qr_needed = PETSC_FALSE;
2826   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2827   total_primal_vertices=0;
2828   pcbddc->local_primal_size_cc = 0;
2829   for (i=0;i<total_counts_cc;i++) {
2830     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2831     if (size_of_constraint == 1) {
2832       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2833       pcbddc->local_primal_size_cc += 1;
2834     } else if (PetscBTLookup(change_basis,i)) {
2835       for (k=0;k<constraints_n[i];k++) {
2836         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2837       }
2838       pcbddc->local_primal_size_cc += constraints_n[i];
2839       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2840         PetscBTSet(qr_needed_idx,i);
2841         qr_needed = PETSC_TRUE;
2842       }
2843     } else {
2844       pcbddc->local_primal_size_cc += 1;
2845     }
2846   }
2847   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2848   pcbddc->n_vertices = total_primal_vertices;
2849   /* permute indices in order to have a sorted set of vertices */
2850   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2851 
2852   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2853   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2854   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2855   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2856 
2857   /* nonzero structure of constraint matrix */
2858   /* and get reference dof for local constraints */
2859   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2860   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2861 
2862   j = total_primal_vertices;
2863   total_counts = total_primal_vertices;
2864   cum = total_primal_vertices;
2865   for (i=n_vertices;i<total_counts_cc;i++) {
2866     if (!PetscBTLookup(change_basis,i)) {
2867       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2868       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2869       cum++;
2870       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2871       for (k=0;k<constraints_n[i];k++) {
2872         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2873         nnz[j+k] = size_of_constraint;
2874       }
2875       j += constraints_n[i];
2876     }
2877   }
2878   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2879   ierr = PetscFree(nnz);CHKERRQ(ierr);
2880 
2881   /* set values in constraint matrix */
2882   for (i=0;i<total_primal_vertices;i++) {
2883     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2884   }
2885   total_counts = total_primal_vertices;
2886   for (i=n_vertices;i<total_counts_cc;i++) {
2887     if (!PetscBTLookup(change_basis,i)) {
2888       PetscInt *cols;
2889 
2890       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2891       cols = constraints_idxs+constraints_idxs_ptr[i];
2892       for (k=0;k<constraints_n[i];k++) {
2893         PetscInt    row = total_counts+k;
2894         PetscScalar *vals;
2895 
2896         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2897         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2898       }
2899       total_counts += constraints_n[i];
2900     }
2901   }
2902   /* assembling */
2903   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905 
2906   /*
2907   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2908   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2909   */
2910   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2911   if (pcbddc->use_change_of_basis) {
2912     /* dual and primal dofs on a single cc */
2913     PetscInt     dual_dofs,primal_dofs;
2914     /* working stuff for GEQRF */
2915     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2916     PetscBLASInt lqr_work;
2917     /* working stuff for UNGQR */
2918     PetscScalar  *gqr_work,lgqr_work_t;
2919     PetscBLASInt lgqr_work;
2920     /* working stuff for TRTRS */
2921     PetscScalar  *trs_rhs;
2922     PetscBLASInt Blas_NRHS;
2923     /* pointers for values insertion into change of basis matrix */
2924     PetscInt     *start_rows,*start_cols;
2925     PetscScalar  *start_vals;
2926     /* working stuff for values insertion */
2927     PetscBT      is_primal;
2928     PetscInt     *aux_primal_numbering_B;
2929     /* matrix sizes */
2930     PetscInt     global_size,local_size;
2931     /* temporary change of basis */
2932     Mat          localChangeOfBasisMatrix;
2933     /* extra space for debugging */
2934     PetscScalar  *dbg_work;
2935 
2936     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2937     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2938     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2939     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2940     /* nonzeros for local mat */
2941     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2942     for (i=0;i<pcis->n;i++) nnz[i]=1;
2943     for (i=n_vertices;i<total_counts_cc;i++) {
2944       if (PetscBTLookup(change_basis,i)) {
2945         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2946         if (PetscBTLookup(qr_needed_idx,i)) {
2947           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2948         } else {
2949           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2950           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2951         }
2952       }
2953     }
2954     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2955     ierr = PetscFree(nnz);CHKERRQ(ierr);
2956     /* Set initial identity in the matrix */
2957     for (i=0;i<pcis->n;i++) {
2958       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2959     }
2960 
2961     if (pcbddc->dbg_flag) {
2962       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2963       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2964     }
2965 
2966 
2967     /* Now we loop on the constraints which need a change of basis */
2968     /*
2969        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2970        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2971 
2972        Basic blocks of change of basis matrix T computed by
2973 
2974           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2975 
2976             | 1        0   ...        0         s_1/S |
2977             | 0        1   ...        0         s_2/S |
2978             |              ...                        |
2979             | 0        ...            1     s_{n-1}/S |
2980             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2981 
2982             with S = \sum_{i=1}^n s_i^2
2983             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2984                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2985 
2986           - QR decomposition of constraints otherwise
2987     */
2988     if (qr_needed) {
2989       /* space to store Q */
2990       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2991       /* first we issue queries for optimal work */
2992       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2993       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2994       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2995       lqr_work = -1;
2996       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2997       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2998       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2999       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3000       lgqr_work = -1;
3001       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3002       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3003       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3004       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3005       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3006       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3007       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3008       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3009       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3010       /* array to store scaling factors for reflectors */
3011       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3012       /* array to store rhs and solution of triangular solver */
3013       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3014       /* allocating workspace for check */
3015       if (pcbddc->dbg_flag) {
3016         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3017       }
3018     }
3019     /* array to store whether a node is primal or not */
3020     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3021     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3022     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3023     if (i != total_primal_vertices) {
3024       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3025     }
3026     for (i=0;i<total_primal_vertices;i++) {
3027       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3028     }
3029     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3030 
3031     /* loop on constraints and see whether or not they need a change of basis and compute it */
3032     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3033       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3034       if (PetscBTLookup(change_basis,total_counts)) {
3035         /* get constraint info */
3036         primal_dofs = constraints_n[total_counts];
3037         dual_dofs = size_of_constraint-primal_dofs;
3038 
3039         if (pcbddc->dbg_flag) {
3040           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3041         }
3042 
3043         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3044 
3045           /* copy quadrature constraints for change of basis check */
3046           if (pcbddc->dbg_flag) {
3047             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3048           }
3049           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3050           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3051 
3052           /* compute QR decomposition of constraints */
3053           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3054           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3055           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3056           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3057           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3058           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3059           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3060 
3061           /* explictly compute R^-T */
3062           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3063           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3064           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3065           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3066           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3067           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3068           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3069           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3070           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3071           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3072 
3073           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3074           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3075           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3076           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3077           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3078           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3079           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3080           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3081           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3082 
3083           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3084              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3085              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3086           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3087           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3088           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3089           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3090           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3091           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3092           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3093           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3094           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3095           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3096 
3097           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3098           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3099           /* insert cols for primal dofs */
3100           for (j=0;j<primal_dofs;j++) {
3101             start_vals = &qr_basis[j*size_of_constraint];
3102             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3103             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3104           }
3105           /* insert cols for dual dofs */
3106           for (j=0,k=0;j<dual_dofs;k++) {
3107             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3108               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3109               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3110               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3111               j++;
3112             }
3113           }
3114 
3115           /* check change of basis */
3116           if (pcbddc->dbg_flag) {
3117             PetscInt   ii,jj;
3118             PetscBool valid_qr=PETSC_TRUE;
3119             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3120             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3121             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3122             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3123             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3124             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3125             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3126             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3127             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3128             for (jj=0;jj<size_of_constraint;jj++) {
3129               for (ii=0;ii<primal_dofs;ii++) {
3130                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3131                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3132               }
3133             }
3134             if (!valid_qr) {
3135               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3136               for (jj=0;jj<size_of_constraint;jj++) {
3137                 for (ii=0;ii<primal_dofs;ii++) {
3138                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3139                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3140                   }
3141                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3142                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3143                   }
3144                 }
3145               }
3146             } else {
3147               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3148             }
3149           }
3150         } else { /* simple transformation block */
3151           PetscInt    row,col;
3152           PetscScalar val,norm;
3153 
3154           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3155           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3156           for (j=0;j<size_of_constraint;j++) {
3157             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3158             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3159             if (!PetscBTLookup(is_primal,row_B)) {
3160               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3161               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3162               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3163             } else {
3164               for (k=0;k<size_of_constraint;k++) {
3165                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3166                 if (row != col) {
3167                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3168                 } else {
3169                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3170                 }
3171                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3172               }
3173             }
3174           }
3175           if (pcbddc->dbg_flag) {
3176             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3177           }
3178         }
3179       } else {
3180         if (pcbddc->dbg_flag) {
3181           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3182         }
3183       }
3184     }
3185 
3186     /* free workspace */
3187     if (qr_needed) {
3188       if (pcbddc->dbg_flag) {
3189         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3190       }
3191       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3192       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3193       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3194       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3195       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3196     }
3197     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3198     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3199     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3200 
3201     /* assembling of global change of variable */
3202     {
3203       Mat      tmat;
3204       PetscInt bs;
3205 
3206       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3207       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3208       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3209       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3210       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3211       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3212       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3213       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3214       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3215       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3216       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3217       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3218       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3219       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3220       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3221       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3222       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3223       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3224     }
3225     /* check */
3226     if (pcbddc->dbg_flag) {
3227       PetscReal error;
3228       Vec       x,x_change;
3229 
3230       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3231       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3232       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3233       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3234       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3236       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3237       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3238       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3239       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3240       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3241       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3242       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3243       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3244       ierr = VecDestroy(&x);CHKERRQ(ierr);
3245       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3246     }
3247 
3248     /* adapt sub_schurs computed (if any) */
3249     if (pcbddc->use_deluxe_scaling) {
3250       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3251       if (sub_schurs->S_Ej_all) {
3252         Mat                    S_new,tmat;
3253         ISLocalToGlobalMapping NtoSall;
3254         IS                     is_all_N,is_V,is_V_Sall;
3255         const PetscScalar      *array;
3256         const PetscInt         *idxs_V,*idxs_all;
3257         PetscInt               i,n_V;
3258 
3259         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3260         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3261         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3262         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3263         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3264         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3265         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3266         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3267         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3268         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3269         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3270         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3271         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3272         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3273         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3274         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3275         for (i=0;i<n_V;i++) {
3276           PetscScalar val;
3277           PetscInt    idx;
3278 
3279           idx = idxs_V[i];
3280           val = array[idxs_all[idxs_V[i]]];
3281           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3282         }
3283         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3284         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285         sub_schurs->S_Ej_all = S_new;
3286         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3287         if (sub_schurs->sum_S_Ej_all) {
3288           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3289           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3290           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3291           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3292           sub_schurs->sum_S_Ej_all = S_new;
3293           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3294         }
3295         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3296         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3297         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3298         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3299         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3300       }
3301     }
3302     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3303   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3304     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3305     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3306   }
3307 
3308   /* set up change of basis context */
3309   if (pcbddc->ChangeOfBasisMatrix) {
3310     PCBDDCChange_ctx change_ctx;
3311 
3312     if (!pcbddc->new_global_mat) {
3313       PetscInt global_size,local_size;
3314 
3315       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3316       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3317       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3318       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3319       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3320       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3321       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3322       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3323       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3324     } else {
3325       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3326       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3327       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3328     }
3329     if (!pcbddc->user_ChangeOfBasisMatrix) {
3330       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3331       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3332     } else {
3333       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3334       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3335     }
3336     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3337     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3338     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3339     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3340   }
3341 
3342   /* add pressure dof to set of primal nodes for numbering purposes */
3343   if (pcbddc->benign_p0_lidx >= 0) {
3344     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx;
3345     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx;
3346     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3347     pcbddc->local_primal_size_cc++;
3348     pcbddc->local_primal_size++;
3349   }
3350 
3351   /* check if a new primal space has been introduced (also take into account benign trick) */
3352   pcbddc->new_primal_space_local = PETSC_TRUE;
3353   if (olocal_primal_size == pcbddc->local_primal_size) {
3354     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3355     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3356     if (!pcbddc->new_primal_space_local) {
3357       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3358       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3359     }
3360   }
3361   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3362   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3363   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3364 
3365   /* flush dbg viewer */
3366   if (pcbddc->dbg_flag) {
3367     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3368   }
3369 
3370   /* free workspace */
3371   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3372   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3373   if (!pcbddc->adaptive_selection) {
3374     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3375     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3376   } else {
3377     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3378                       pcbddc->adaptive_constraints_idxs_ptr,
3379                       pcbddc->adaptive_constraints_data_ptr,
3380                       pcbddc->adaptive_constraints_idxs,
3381                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3382     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3383     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3384   }
3385   PetscFunctionReturn(0);
3386 }
3387 
3388 #undef __FUNCT__
3389 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3390 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3391 {
3392   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3393   PC_IS       *pcis = (PC_IS*)pc->data;
3394   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3395   PetscInt    ierr,i,vertex_size,N;
3396   PetscViewer viewer=pcbddc->dbg_viewer;
3397 
3398   PetscFunctionBegin;
3399   /* Reset previously computed graph */
3400   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3401   /* Init local Graph struct */
3402   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3403   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3404 
3405   /* Check validity of the csr graph passed in by the user */
3406   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3407     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3408   }
3409 
3410   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3411   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3412     PetscInt  *xadj,*adjncy;
3413     PetscInt  nvtxs;
3414     PetscBool flg_row=PETSC_FALSE;
3415 
3416     if (pcbddc->use_local_adj) {
3417 
3418       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3419       if (flg_row) {
3420         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3421         pcbddc->computed_rowadj = PETSC_TRUE;
3422       }
3423       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3424     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3425       IS                     is_dummy;
3426       ISLocalToGlobalMapping l2gmap_dummy;
3427       PetscInt               j,sum;
3428       PetscInt               *cxadj,*cadjncy;
3429       const PetscInt         *idxs;
3430       PCBDDCGraph            graph;
3431       PetscBT                is_on_boundary;
3432 
3433       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3434       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3435       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3436       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3437       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3438       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3439       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3440       if (flg_row) {
3441         graph->xadj = xadj;
3442         graph->adjncy = adjncy;
3443       }
3444       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3445       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3446       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3447 
3448       if (pcbddc->dbg_flag) {
3449         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3450         for (i=0;i<graph->ncc;i++) {
3451           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3452         }
3453       }
3454 
3455       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3456       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3457       for (i=0;i<pcis->n_B;i++) {
3458         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3459       }
3460       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3461 
3462       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3463       sum = 0;
3464       for (i=0;i<graph->ncc;i++) {
3465         PetscInt sizecc = 0;
3466         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3467           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3468             sizecc++;
3469           }
3470         }
3471         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3472           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3473             cxadj[graph->queue[j]] = sizecc;
3474           }
3475         }
3476         sum += sizecc*sizecc;
3477       }
3478       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3479       sum = 0;
3480       for (i=0;i<pcis->n;i++) {
3481         PetscInt temp = cxadj[i];
3482         cxadj[i] = sum;
3483         sum += temp;
3484       }
3485       cxadj[pcis->n] = sum;
3486       for (i=0;i<graph->ncc;i++) {
3487         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3488           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3489             PetscInt k,sizecc = 0;
3490             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3491               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3492                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3493                 sizecc++;
3494               }
3495             }
3496           }
3497         }
3498       }
3499       if (sum) {
3500         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3501       } else {
3502         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3503         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3504       }
3505       graph->xadj = 0;
3506       graph->adjncy = 0;
3507       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3508       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3509     }
3510   }
3511   if (pcbddc->dbg_flag) {
3512     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3513   }
3514 
3515   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3516   vertex_size = 1;
3517   if (pcbddc->user_provided_isfordofs) {
3518     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3519       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3520       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3521         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3522         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3523       }
3524       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3525       pcbddc->n_ISForDofs = 0;
3526       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3527     }
3528     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3529     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3530   } else {
3531     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3532       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3533       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3534       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3535         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3536       }
3537     }
3538   }
3539 
3540   /* Setup of Graph */
3541   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3542     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3543   }
3544   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3545     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3546   }
3547   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3548 
3549   /* Graph's connected components analysis */
3550   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3551 
3552   /* print some info to stdout */
3553   if (pcbddc->dbg_flag) {
3554     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3555   }
3556 
3557   /* mark topography has done */
3558   pcbddc->recompute_topography = PETSC_FALSE;
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3563 #undef __FUNCT__
3564 #define __FUNCT__ "PCBDDCSubsetNumbering"
3565 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3566 {
3567   PetscSF        sf;
3568   PetscLayout    map;
3569   const PetscInt *idxs;
3570   PetscInt       *leaf_data,*root_data,*gidxs;
3571   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3572   PetscInt       n_n,nlocals,start,first_index;
3573   PetscMPIInt    commsize;
3574   PetscBool      first_found;
3575   PetscErrorCode ierr;
3576 
3577   PetscFunctionBegin;
3578   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3579   if (subset_mult) {
3580     PetscCheckSameComm(subset,1,subset_mult,2);
3581     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3582     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3583   }
3584   /* create workspace layout for computing global indices of subset */
3585   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3586   lbounds[0] = lbounds[1] = 0;
3587   for (i=0;i<n;i++) {
3588     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3589     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3590   }
3591   lbounds[0] = -lbounds[0];
3592   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3593   gbounds[0] = -gbounds[0];
3594   N = gbounds[1] - gbounds[0] + 1;
3595   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3596   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3597   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3598   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3599   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3600 
3601   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3602   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3603   if (subset_mult) {
3604     const PetscInt* idxs_mult;
3605 
3606     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3607     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3608     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3609   } else {
3610     for (i=0;i<n;i++) leaf_data[i] = 1;
3611   }
3612   /* local size of new subset */
3613   n_n = 0;
3614   for (i=0;i<n;i++) n_n += leaf_data[i];
3615 
3616   /* global indexes in layout */
3617   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3618   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3619   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3620   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3621   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3622   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3623 
3624   /* reduce from leaves to roots */
3625   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3626   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3627   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3628 
3629   /* count indexes in local part of layout */
3630   nlocals = 0;
3631   first_index = -1;
3632   first_found = PETSC_FALSE;
3633   for (i=0;i<Nl;i++) {
3634     if (!first_found && root_data[i]) {
3635       first_found = PETSC_TRUE;
3636       first_index = i;
3637     }
3638     nlocals += root_data[i];
3639   }
3640 
3641   /* cumulative of number of indexes and size of subset without holes */
3642 #if defined(PETSC_HAVE_MPI_EXSCAN)
3643   start = 0;
3644   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3645 #else
3646   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3647   start = start-nlocals;
3648 #endif
3649 
3650   if (N_n) { /* compute total size of new subset if requested */
3651     *N_n = start + nlocals;
3652     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3653     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3654   }
3655 
3656   /* adapt root data with cumulative */
3657   if (first_found) {
3658     PetscInt old_index;
3659 
3660     root_data[first_index] += start;
3661     old_index = first_index;
3662     for (i=first_index+1;i<Nl;i++) {
3663       if (root_data[i]) {
3664         root_data[i] += root_data[old_index];
3665         old_index = i;
3666       }
3667     }
3668   }
3669 
3670   /* from roots to leaves */
3671   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3672   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3673   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3674 
3675   /* create new IS with global indexes without holes */
3676   if (subset_mult) {
3677     const PetscInt* idxs_mult;
3678     PetscInt        cum;
3679 
3680     cum = 0;
3681     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3682     for (i=0;i<n;i++) {
3683       PetscInt j;
3684       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3685     }
3686     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3687   } else {
3688     for (i=0;i<n;i++) {
3689       gidxs[i] = leaf_data[i]-1;
3690     }
3691   }
3692   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3693   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3694   PetscFunctionReturn(0);
3695 }
3696 
3697 #undef __FUNCT__
3698 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3699 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3700 {
3701   PetscInt       i,j;
3702   PetscScalar    *alphas;
3703   PetscErrorCode ierr;
3704 
3705   PetscFunctionBegin;
3706   /* this implements stabilized Gram-Schmidt */
3707   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3708   for (i=0;i<n;i++) {
3709     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3710     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3711     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3712   }
3713   ierr = PetscFree(alphas);CHKERRQ(ierr);
3714   PetscFunctionReturn(0);
3715 }
3716 
3717 #undef __FUNCT__
3718 #define __FUNCT__ "MatISGetSubassemblingPattern"
3719 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3720 {
3721   IS             ranks_send_to;
3722   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3723   PetscMPIInt    size,rank,color;
3724   PetscInt       *xadj,*adjncy;
3725   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3726   PetscInt       i,local_size,threshold=0;
3727   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3728   PetscSubcomm   subcomm;
3729   PetscErrorCode ierr;
3730 
3731   PetscFunctionBegin;
3732   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3733   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3734   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3735 
3736   /* Get info on mapping */
3737   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3738   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3739 
3740   /* build local CSR graph of subdomains' connectivity */
3741   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3742   xadj[0] = 0;
3743   xadj[1] = PetscMax(n_neighs-1,0);
3744   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3745   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3746 
3747   if (threshold) {
3748     PetscInt xadj_count = 0;
3749     for (i=1;i<n_neighs;i++) {
3750       if (n_shared[i] > threshold) {
3751         adjncy[xadj_count] = neighs[i];
3752         adjncy_wgt[xadj_count] = n_shared[i];
3753         xadj_count++;
3754       }
3755     }
3756     xadj[1] = xadj_count;
3757   } else {
3758     if (xadj[1]) {
3759       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3760       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3761     }
3762   }
3763   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3764   if (use_square) {
3765     for (i=0;i<xadj[1];i++) {
3766       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3767     }
3768   }
3769   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3770 
3771   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3772 
3773   /*
3774     Restrict work on active processes only.
3775   */
3776   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3777   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3778   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3779   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3780   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3781   if (color) {
3782     ierr = PetscFree(xadj);CHKERRQ(ierr);
3783     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3784     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3785   } else {
3786     Mat             subdomain_adj;
3787     IS              new_ranks,new_ranks_contig;
3788     MatPartitioning partitioner;
3789     PetscInt        prank,rstart=0,rend=0;
3790     PetscInt        *is_indices,*oldranks;
3791     PetscBool       aggregate;
3792 
3793     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3794     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3795     prank = rank;
3796     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3797     /*
3798     for (i=0;i<size;i++) {
3799       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3800     }
3801     */
3802     for (i=0;i<xadj[1];i++) {
3803       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3804     }
3805     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3806     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3807     if (aggregate) {
3808       PetscInt    lrows,row,ncols,*cols;
3809       PetscMPIInt nrank;
3810       PetscScalar *vals;
3811 
3812       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3813       lrows = 0;
3814       if (nrank<redprocs) {
3815         lrows = size/redprocs;
3816         if (nrank<size%redprocs) lrows++;
3817       }
3818       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3819       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3820       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3821       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3822       row = nrank;
3823       ncols = xadj[1]-xadj[0];
3824       cols = adjncy;
3825       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3826       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3827       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3828       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830       ierr = PetscFree(xadj);CHKERRQ(ierr);
3831       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3832       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3833       ierr = PetscFree(vals);CHKERRQ(ierr);
3834     } else {
3835       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3836     }
3837     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3838 
3839     /* Partition */
3840     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3841     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3842     if (use_vwgt) {
3843       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3844       v_wgt[0] = local_size;
3845       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3846     }
3847     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3848     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3849     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3850     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3851     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3852 
3853     /* renumber new_ranks to avoid "holes" in new set of processors */
3854     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3855     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3856     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3857     if (!redprocs) {
3858       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3859     } else {
3860       PetscInt    idxs[1];
3861       PetscMPIInt tag;
3862       MPI_Request *reqs;
3863 
3864       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3865       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3866       for (i=rstart;i<rend;i++) {
3867         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3868       }
3869       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3870       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3871       ierr = PetscFree(reqs);CHKERRQ(ierr);
3872       ranks_send_to_idx[0] = oldranks[idxs[0]];
3873     }
3874     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3875     /* clean up */
3876     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3877     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3878     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3879     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3880   }
3881   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3882 
3883   /* assemble parallel IS for sends */
3884   i = 1;
3885   if (color) i=0;
3886   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3887   /* get back IS */
3888   *is_sends = ranks_send_to;
3889   PetscFunctionReturn(0);
3890 }
3891 
3892 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3893 
3894 #undef __FUNCT__
3895 #define __FUNCT__ "MatISSubassemble"
3896 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3897 {
3898   Mat                    local_mat;
3899   IS                     is_sends_internal;
3900   PetscInt               rows,cols,new_local_rows;
3901   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3902   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3903   ISLocalToGlobalMapping l2gmap;
3904   PetscInt*              l2gmap_indices;
3905   const PetscInt*        is_indices;
3906   MatType                new_local_type;
3907   /* buffers */
3908   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3909   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3910   PetscInt               *recv_buffer_idxs_local;
3911   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3912   /* MPI */
3913   MPI_Comm               comm,comm_n;
3914   PetscSubcomm           subcomm;
3915   PetscMPIInt            n_sends,n_recvs,commsize;
3916   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3917   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3918   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3919   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3920   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3921   PetscErrorCode         ierr;
3922 
3923   PetscFunctionBegin;
3924   /* TODO: add missing checks */
3925   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3926   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3927   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3928   PetscValidLogicalCollectiveInt(mat,nis,7);
3929   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3930   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3931   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3932   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3933   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3934   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3935   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3936   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3937     PetscInt mrows,mcols,mnrows,mncols;
3938     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3939     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3940     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3941     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3942     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3943     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3944   }
3945   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3946   PetscValidLogicalCollectiveInt(mat,bs,0);
3947   /* prepare IS for sending if not provided */
3948   if (!is_sends) {
3949     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3950     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3951   } else {
3952     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3953     is_sends_internal = is_sends;
3954   }
3955 
3956   /* get comm */
3957   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3958 
3959   /* compute number of sends */
3960   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3961   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3962 
3963   /* compute number of receives */
3964   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3965   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3966   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3967   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3968   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3969   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3970   ierr = PetscFree(iflags);CHKERRQ(ierr);
3971 
3972   /* restrict comm if requested */
3973   subcomm = 0;
3974   destroy_mat = PETSC_FALSE;
3975   if (restrict_comm) {
3976     PetscMPIInt color,subcommsize;
3977 
3978     color = 0;
3979     if (restrict_full) {
3980       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3981     } else {
3982       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3983     }
3984     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3985     subcommsize = commsize - subcommsize;
3986     /* check if reuse has been requested */
3987     if (reuse == MAT_REUSE_MATRIX) {
3988       if (*mat_n) {
3989         PetscMPIInt subcommsize2;
3990         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3991         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3992         comm_n = PetscObjectComm((PetscObject)*mat_n);
3993       } else {
3994         comm_n = PETSC_COMM_SELF;
3995       }
3996     } else { /* MAT_INITIAL_MATRIX */
3997       PetscMPIInt rank;
3998 
3999       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4000       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4001       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4002       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4003       comm_n = PetscSubcommChild(subcomm);
4004     }
4005     /* flag to destroy *mat_n if not significative */
4006     if (color) destroy_mat = PETSC_TRUE;
4007   } else {
4008     comm_n = comm;
4009   }
4010 
4011   /* prepare send/receive buffers */
4012   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4013   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4014   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4015   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4016   if (nis) {
4017     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4018   }
4019 
4020   /* Get data from local matrices */
4021   if (!isdense) {
4022     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4023     /* TODO: See below some guidelines on how to prepare the local buffers */
4024     /*
4025        send_buffer_vals should contain the raw values of the local matrix
4026        send_buffer_idxs should contain:
4027        - MatType_PRIVATE type
4028        - PetscInt        size_of_l2gmap
4029        - PetscInt        global_row_indices[size_of_l2gmap]
4030        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4031     */
4032   } else {
4033     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4034     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4035     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4036     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4037     send_buffer_idxs[1] = i;
4038     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4039     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4040     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4041     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4042     for (i=0;i<n_sends;i++) {
4043       ilengths_vals[is_indices[i]] = len*len;
4044       ilengths_idxs[is_indices[i]] = len+2;
4045     }
4046   }
4047   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4048   /* additional is (if any) */
4049   if (nis) {
4050     PetscMPIInt psum;
4051     PetscInt j;
4052     for (j=0,psum=0;j<nis;j++) {
4053       PetscInt plen;
4054       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4055       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4056       psum += len+1; /* indices + lenght */
4057     }
4058     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4059     for (j=0,psum=0;j<nis;j++) {
4060       PetscInt plen;
4061       const PetscInt *is_array_idxs;
4062       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4063       send_buffer_idxs_is[psum] = plen;
4064       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4065       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4066       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4067       psum += plen+1; /* indices + lenght */
4068     }
4069     for (i=0;i<n_sends;i++) {
4070       ilengths_idxs_is[is_indices[i]] = psum;
4071     }
4072     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4073   }
4074 
4075   buf_size_idxs = 0;
4076   buf_size_vals = 0;
4077   buf_size_idxs_is = 0;
4078   for (i=0;i<n_recvs;i++) {
4079     buf_size_idxs += (PetscInt)olengths_idxs[i];
4080     buf_size_vals += (PetscInt)olengths_vals[i];
4081     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4082   }
4083   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4084   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4085   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4086 
4087   /* get new tags for clean communications */
4088   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4089   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4090   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4091 
4092   /* allocate for requests */
4093   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4094   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4095   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4096   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4097   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4098   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4099 
4100   /* communications */
4101   ptr_idxs = recv_buffer_idxs;
4102   ptr_vals = recv_buffer_vals;
4103   ptr_idxs_is = recv_buffer_idxs_is;
4104   for (i=0;i<n_recvs;i++) {
4105     source_dest = onodes[i];
4106     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4107     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4108     ptr_idxs += olengths_idxs[i];
4109     ptr_vals += olengths_vals[i];
4110     if (nis) {
4111       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4112       ptr_idxs_is += olengths_idxs_is[i];
4113     }
4114   }
4115   for (i=0;i<n_sends;i++) {
4116     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4117     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4118     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4119     if (nis) {
4120       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4121     }
4122   }
4123   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4124   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4125 
4126   /* assemble new l2g map */
4127   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4128   ptr_idxs = recv_buffer_idxs;
4129   new_local_rows = 0;
4130   for (i=0;i<n_recvs;i++) {
4131     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4132     ptr_idxs += olengths_idxs[i];
4133   }
4134   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4135   ptr_idxs = recv_buffer_idxs;
4136   new_local_rows = 0;
4137   for (i=0;i<n_recvs;i++) {
4138     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4139     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4140     ptr_idxs += olengths_idxs[i];
4141   }
4142   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4143   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4144   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4145 
4146   /* infer new local matrix type from received local matrices type */
4147   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4148   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4149   if (n_recvs) {
4150     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4151     ptr_idxs = recv_buffer_idxs;
4152     for (i=0;i<n_recvs;i++) {
4153       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4154         new_local_type_private = MATAIJ_PRIVATE;
4155         break;
4156       }
4157       ptr_idxs += olengths_idxs[i];
4158     }
4159     switch (new_local_type_private) {
4160       case MATDENSE_PRIVATE:
4161         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4162           new_local_type = MATSEQAIJ;
4163           bs = 1;
4164         } else { /* if I receive only 1 dense matrix */
4165           new_local_type = MATSEQDENSE;
4166           bs = 1;
4167         }
4168         break;
4169       case MATAIJ_PRIVATE:
4170         new_local_type = MATSEQAIJ;
4171         bs = 1;
4172         break;
4173       case MATBAIJ_PRIVATE:
4174         new_local_type = MATSEQBAIJ;
4175         break;
4176       case MATSBAIJ_PRIVATE:
4177         new_local_type = MATSEQSBAIJ;
4178         break;
4179       default:
4180         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4181         break;
4182     }
4183   } else { /* by default, new_local_type is seqdense */
4184     new_local_type = MATSEQDENSE;
4185     bs = 1;
4186   }
4187 
4188   /* create MATIS object if needed */
4189   if (reuse == MAT_INITIAL_MATRIX) {
4190     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4191     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4192   } else {
4193     /* it also destroys the local matrices */
4194     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4195   }
4196   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4197   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4198 
4199   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4200 
4201   /* Global to local map of received indices */
4202   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4203   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4204   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4205 
4206   /* restore attributes -> type of incoming data and its size */
4207   buf_size_idxs = 0;
4208   for (i=0;i<n_recvs;i++) {
4209     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4210     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4211     buf_size_idxs += (PetscInt)olengths_idxs[i];
4212   }
4213   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4214 
4215   /* set preallocation */
4216   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4217   if (!newisdense) {
4218     PetscInt *new_local_nnz=0;
4219 
4220     ptr_vals = recv_buffer_vals;
4221     ptr_idxs = recv_buffer_idxs_local;
4222     if (n_recvs) {
4223       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4224     }
4225     for (i=0;i<n_recvs;i++) {
4226       PetscInt j;
4227       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4228         for (j=0;j<*(ptr_idxs+1);j++) {
4229           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4230         }
4231       } else {
4232         /* TODO */
4233       }
4234       ptr_idxs += olengths_idxs[i];
4235     }
4236     if (new_local_nnz) {
4237       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4238       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4239       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4240       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4241       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4242       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4243     } else {
4244       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4245     }
4246     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4247   } else {
4248     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4249   }
4250 
4251   /* set values */
4252   ptr_vals = recv_buffer_vals;
4253   ptr_idxs = recv_buffer_idxs_local;
4254   for (i=0;i<n_recvs;i++) {
4255     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4256       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4257       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4258       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4259       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4260       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4261     } else {
4262       /* TODO */
4263     }
4264     ptr_idxs += olengths_idxs[i];
4265     ptr_vals += olengths_vals[i];
4266   }
4267   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4268   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4269   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4270   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4271   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4272   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4273 
4274 #if 0
4275   if (!restrict_comm) { /* check */
4276     Vec       lvec,rvec;
4277     PetscReal infty_error;
4278 
4279     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4280     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4281     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4282     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4283     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4284     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4285     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4286     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4287     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4288   }
4289 #endif
4290 
4291   /* assemble new additional is (if any) */
4292   if (nis) {
4293     PetscInt **temp_idxs,*count_is,j,psum;
4294 
4295     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4296     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4297     ptr_idxs = recv_buffer_idxs_is;
4298     psum = 0;
4299     for (i=0;i<n_recvs;i++) {
4300       for (j=0;j<nis;j++) {
4301         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4302         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4303         psum += plen;
4304         ptr_idxs += plen+1; /* shift pointer to received data */
4305       }
4306     }
4307     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4308     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4309     for (i=1;i<nis;i++) {
4310       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4311     }
4312     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4313     ptr_idxs = recv_buffer_idxs_is;
4314     for (i=0;i<n_recvs;i++) {
4315       for (j=0;j<nis;j++) {
4316         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4317         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4318         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4319         ptr_idxs += plen+1; /* shift pointer to received data */
4320       }
4321     }
4322     for (i=0;i<nis;i++) {
4323       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4324       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4325       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4326     }
4327     ierr = PetscFree(count_is);CHKERRQ(ierr);
4328     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4329     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4330   }
4331   /* free workspace */
4332   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4333   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4334   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4335   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4336   if (isdense) {
4337     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4338     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4339   } else {
4340     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4341   }
4342   if (nis) {
4343     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4344     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4345   }
4346   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4347   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4348   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4349   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4350   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4351   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4352   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4353   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4354   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4355   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4356   ierr = PetscFree(onodes);CHKERRQ(ierr);
4357   if (nis) {
4358     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4359     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4360     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4361   }
4362   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4363   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4364     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4365     for (i=0;i<nis;i++) {
4366       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4367     }
4368     *mat_n = NULL;
4369   }
4370   PetscFunctionReturn(0);
4371 }
4372 
4373 /* temporary hack into ksp private data structure */
4374 #include <petsc/private/kspimpl.h>
4375 
4376 #undef __FUNCT__
4377 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4378 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4379 {
4380   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4381   PC_IS                  *pcis = (PC_IS*)pc->data;
4382   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4383   MatNullSpace           CoarseNullSpace=NULL;
4384   ISLocalToGlobalMapping coarse_islg;
4385   IS                     coarse_is,*isarray;
4386   PetscInt               i,im_active=-1,active_procs=-1;
4387   PetscInt               nis,nisdofs,nisneu;
4388   PC                     pc_temp;
4389   PCType                 coarse_pc_type;
4390   KSPType                coarse_ksp_type;
4391   PetscBool              multilevel_requested,multilevel_allowed;
4392   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4393   Mat                    t_coarse_mat_is;
4394   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4395   PetscMPIInt            all_procs;
4396   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4397   PetscBool              compute_vecs = PETSC_FALSE;
4398   PetscScalar            *array;
4399   PetscErrorCode         ierr;
4400 
4401   PetscFunctionBegin;
4402   /* Assign global numbering to coarse dofs */
4403   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4404     PetscInt ocoarse_size;
4405     compute_vecs = PETSC_TRUE;
4406     ocoarse_size = pcbddc->coarse_size;
4407     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4408     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4409     /* see if we can avoid some work */
4410     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4411       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4412       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4413         PC        pc;
4414         PetscBool isbddc;
4415 
4416         /* temporary workaround since PCBDDC does not have a reset method so far */
4417         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4418         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4419         if (isbddc) {
4420           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4421         }
4422         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4423         coarse_reuse = PETSC_FALSE;
4424       } else { /* we can safely reuse already computed coarse matrix */
4425         coarse_reuse = PETSC_TRUE;
4426       }
4427     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4428       coarse_reuse = PETSC_FALSE;
4429     }
4430     /* reset any subassembling information */
4431     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4432     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4433   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4434     coarse_reuse = PETSC_TRUE;
4435   }
4436 
4437   /* count "active" (i.e. with positive local size) and "void" processes */
4438   im_active = !!(pcis->n);
4439   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4440   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4441   void_procs = all_procs-active_procs;
4442   csin_type_simple = PETSC_TRUE;
4443   redist = PETSC_FALSE;
4444   if (pcbddc->current_level && void_procs) {
4445     csin_ml = PETSC_TRUE;
4446     ncoarse_ml = void_procs;
4447     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4448     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4449       csin_ds = PETSC_TRUE;
4450       ncoarse_ds = pcbddc->redistribute_coarse;
4451       redist = PETSC_TRUE;
4452     } else {
4453       csin_ds = PETSC_TRUE;
4454       ncoarse_ds = active_procs;
4455       redist = PETSC_TRUE;
4456     }
4457   } else {
4458     csin_ml = PETSC_FALSE;
4459     ncoarse_ml = all_procs;
4460     if (void_procs) {
4461       csin_ds = PETSC_TRUE;
4462       ncoarse_ds = void_procs;
4463       csin_type_simple = PETSC_FALSE;
4464     } else {
4465       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4466         csin_ds = PETSC_TRUE;
4467         ncoarse_ds = pcbddc->redistribute_coarse;
4468         redist = PETSC_TRUE;
4469       } else {
4470         csin_ds = PETSC_FALSE;
4471         ncoarse_ds = all_procs;
4472       }
4473     }
4474   }
4475 
4476   /*
4477     test if we can go multilevel: three conditions must be satisfied:
4478     - we have not exceeded the number of levels requested
4479     - we can actually subassemble the active processes
4480     - we can find a suitable number of MPI processes where we can place the subassembled problem
4481   */
4482   multilevel_allowed = PETSC_FALSE;
4483   multilevel_requested = PETSC_FALSE;
4484   if (pcbddc->current_level < pcbddc->max_levels) {
4485     multilevel_requested = PETSC_TRUE;
4486     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4487       multilevel_allowed = PETSC_FALSE;
4488     } else {
4489       multilevel_allowed = PETSC_TRUE;
4490     }
4491   }
4492   /* determine number of process partecipating to coarse solver */
4493   if (multilevel_allowed) {
4494     ncoarse = ncoarse_ml;
4495     csin = csin_ml;
4496     redist = PETSC_FALSE;
4497   } else {
4498     ncoarse = ncoarse_ds;
4499     csin = csin_ds;
4500   }
4501 
4502   /* creates temporary l2gmap and IS for coarse indexes */
4503   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4504   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4505 
4506   /* creates temporary MATIS object for coarse matrix */
4507   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4508   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4509   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4510   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4511 #if 0
4512   {
4513     PetscViewer viewer;
4514     char filename[256];
4515     sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4516     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4517     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4518     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4519     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4520   }
4521 #endif
4522   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4523   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4524   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4525   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4526   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4527 
4528   /* compute dofs splitting and neumann boundaries for coarse dofs */
4529   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4530     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4531     const PetscInt         *idxs;
4532     ISLocalToGlobalMapping tmap;
4533 
4534     /* create map between primal indices (in local representative ordering) and local primal numbering */
4535     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4536     /* allocate space for temporary storage */
4537     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4538     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4539     /* allocate for IS array */
4540     nisdofs = pcbddc->n_ISForDofsLocal;
4541     nisneu = !!pcbddc->NeumannBoundariesLocal;
4542     nis = nisdofs + nisneu;
4543     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4544     /* dofs splitting */
4545     for (i=0;i<nisdofs;i++) {
4546       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4547       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4548       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4549       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4550       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4551       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4552       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4553       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4554     }
4555     /* neumann boundaries */
4556     if (pcbddc->NeumannBoundariesLocal) {
4557       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4558       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4559       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4560       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4561       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4562       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4563       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4564       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4565     }
4566     /* free memory */
4567     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4568     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4569     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4570   } else {
4571     nis = 0;
4572     nisdofs = 0;
4573     nisneu = 0;
4574     isarray = NULL;
4575   }
4576   /* destroy no longer needed map */
4577   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4578 
4579   /* restrict on coarse candidates (if needed) */
4580   coarse_mat_is = NULL;
4581   if (csin) {
4582     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4583       if (redist) {
4584         PetscMPIInt rank;
4585         PetscInt    spc,n_spc_p1,dest[1],destsize;
4586 
4587         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4588         spc = active_procs/ncoarse;
4589         n_spc_p1 = active_procs%ncoarse;
4590         if (im_active) {
4591           destsize = 1;
4592           if (rank > n_spc_p1*(spc+1)-1) {
4593             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4594           } else {
4595             dest[0] = rank/(spc+1);
4596           }
4597         } else {
4598           destsize = 0;
4599         }
4600         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4601       } else if (csin_type_simple) {
4602         PetscMPIInt rank;
4603         PetscInt    issize,isidx;
4604 
4605         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4606         if (im_active) {
4607           issize = 1;
4608           isidx = (PetscInt)rank;
4609         } else {
4610           issize = 0;
4611           isidx = -1;
4612         }
4613         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4614       } else { /* get a suitable subassembling pattern from MATIS code */
4615         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4616       }
4617 
4618       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4619       if (!redist || ncoarse <= void_procs) {
4620         PetscInt ncoarse_cand,tissize,*nisindices;
4621         PetscInt *coarse_candidates;
4622         const PetscInt* tisindices;
4623 
4624         /* get coarse candidates' ranks in pc communicator */
4625         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4626         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4627         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4628           if (!coarse_candidates[i]) {
4629             coarse_candidates[ncoarse_cand++]=i;
4630           }
4631         }
4632         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4633 
4634 
4635         if (pcbddc->dbg_flag) {
4636           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4637           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4638           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4639           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4640           for (i=0;i<ncoarse_cand;i++) {
4641             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4642           }
4643           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4644           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4645         }
4646         /* shift the pattern on coarse candidates */
4647         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4648         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4649         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4650         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4651         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4652         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4653         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4654       }
4655       if (pcbddc->dbg_flag) {
4656         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4657         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4658         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4659         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4660       }
4661     }
4662     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4663     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4664       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4665     } else { /* this is the last level, so use just receiving processes in subcomm */
4666       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4667     }
4668   } else {
4669     if (pcbddc->dbg_flag) {
4670       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4671       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4672       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4673     }
4674     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4675     coarse_mat_is = t_coarse_mat_is;
4676   }
4677 
4678   /* create local to global scatters for coarse problem */
4679   if (compute_vecs) {
4680     PetscInt lrows;
4681     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4682     if (coarse_mat_is) {
4683       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4684     } else {
4685       lrows = 0;
4686     }
4687     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4688     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4689     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4690     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4691     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4692   }
4693   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4694   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4695 
4696   /* set defaults for coarse KSP and PC */
4697   if (multilevel_allowed) {
4698     coarse_ksp_type = KSPRICHARDSON;
4699     coarse_pc_type = PCBDDC;
4700   } else {
4701     coarse_ksp_type = KSPPREONLY;
4702     coarse_pc_type = PCREDUNDANT;
4703   }
4704 
4705   /* print some info if requested */
4706   if (pcbddc->dbg_flag) {
4707     if (!multilevel_allowed) {
4708       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4709       if (multilevel_requested) {
4710         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4711       } else if (pcbddc->max_levels) {
4712         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4713       }
4714       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4715     }
4716   }
4717 
4718   /* create the coarse KSP object only once with defaults */
4719   if (coarse_mat_is) {
4720     MatReuse coarse_mat_reuse;
4721     PetscViewer dbg_viewer = NULL;
4722     if (pcbddc->dbg_flag) {
4723       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4724       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4725     }
4726     if (!pcbddc->coarse_ksp) {
4727       char prefix[256],str_level[16];
4728       size_t len;
4729       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4730       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4731       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4732       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4733       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4734       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4735       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4736       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4737       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4738       /* prefix */
4739       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4740       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4741       if (!pcbddc->current_level) {
4742         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4743         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4744       } else {
4745         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4746         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4747         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4748         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4749         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4750         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4751       }
4752       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4753       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4754       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4755       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4756       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4757       /* allow user customization */
4758       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4759     }
4760     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4761     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4762     if (nisdofs) {
4763       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4764       for (i=0;i<nisdofs;i++) {
4765         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4766       }
4767     }
4768     if (nisneu) {
4769       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4770       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4771     }
4772 
4773     /* get some info after set from options */
4774     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4775     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4776     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4777     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4778       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4779       isbddc = PETSC_FALSE;
4780     }
4781     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4782     if (isredundant) {
4783       KSP inner_ksp;
4784       PC  inner_pc;
4785       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4786       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4787       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4788     }
4789 
4790     /* assemble coarse matrix */
4791     if (coarse_reuse) {
4792       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4793       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4794       coarse_mat_reuse = MAT_REUSE_MATRIX;
4795     } else {
4796       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4797     }
4798     if (isbddc || isnn) {
4799       if (pcbddc->coarsening_ratio > 1) {
4800         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4801           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4802           if (pcbddc->dbg_flag) {
4803             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4804             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4805             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4806             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4807           }
4808         }
4809         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4810       } else {
4811         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4812         coarse_mat = coarse_mat_is;
4813       }
4814     } else {
4815       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4816     }
4817     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4818 
4819     /* propagate symmetry info of coarse matrix */
4820     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4821     if (pc->pmat->symmetric_set) {
4822       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4823     }
4824     if (pc->pmat->hermitian_set) {
4825       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4826     }
4827     if (pc->pmat->spd_set) {
4828       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4829     }
4830     /* set operators */
4831     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4832     if (pcbddc->dbg_flag) {
4833       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4834     }
4835   } else { /* processes non partecipating to coarse solver (if any) */
4836     coarse_mat = 0;
4837   }
4838   ierr = PetscFree(isarray);CHKERRQ(ierr);
4839 #if 0
4840   {
4841     PetscViewer viewer;
4842     char filename[256];
4843     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
4844     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
4845     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4846     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4847     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4848   }
4849 #endif
4850 
4851   /* Compute coarse null space (special handling by BDDC only) */
4852 #if 0
4853   if (pcbddc->NullSpace) {
4854     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4855   }
4856 #endif
4857 
4858   if (pcbddc->coarse_ksp) {
4859     Vec crhs,csol;
4860     PetscBool ispreonly;
4861 
4862     if (CoarseNullSpace) {
4863       if (isbddc) {
4864         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4865       } else {
4866         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4867       }
4868     }
4869     /* setup coarse ksp */
4870     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4871     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4872     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4873     /* hack */
4874     if (!csol) {
4875       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4876     }
4877     if (!crhs) {
4878       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4879     }
4880     /* Check coarse problem if in debug mode or if solving with an iterative method */
4881     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4882     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4883       KSP       check_ksp;
4884       KSPType   check_ksp_type;
4885       PC        check_pc;
4886       Vec       check_vec,coarse_vec;
4887       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4888       PetscInt  its;
4889       PetscBool compute_eigs;
4890       PetscReal *eigs_r,*eigs_c;
4891       PetscInt  neigs;
4892       const char *prefix;
4893 
4894       /* Create ksp object suitable for estimation of extreme eigenvalues */
4895       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4896       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4897       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4898       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4899       if (ispreonly) {
4900         check_ksp_type = KSPPREONLY;
4901         compute_eigs = PETSC_FALSE;
4902       } else {
4903         check_ksp_type = KSPGMRES;
4904         compute_eigs = PETSC_TRUE;
4905       }
4906       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4907       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4908       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4909       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4910       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4911       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4912       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4913       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4914       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4915       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4916       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4917       /* create random vec */
4918       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4919       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4920       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4921       if (CoarseNullSpace) {
4922         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4923       }
4924       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4925       /* solve coarse problem */
4926       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4927       if (CoarseNullSpace) {
4928         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4929       }
4930       /* set eigenvalue estimation if preonly has not been requested */
4931       if (compute_eigs) {
4932         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4933         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4934         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4935         lambda_max = eigs_r[neigs-1];
4936         lambda_min = eigs_r[0];
4937         if (pcbddc->use_coarse_estimates) {
4938           if (lambda_max>lambda_min) {
4939             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4940             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4941           }
4942         }
4943       }
4944 
4945       /* check coarse problem residual error */
4946       if (pcbddc->dbg_flag) {
4947         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4948         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4949         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4950         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4951         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4952         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4953         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4954         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4955         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4956         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4957         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4958         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4959         if (compute_eigs) {
4960           PetscReal lambda_max_s,lambda_min_s;
4961           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4962           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4963           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4964           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4965           for (i=0;i<neigs;i++) {
4966             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4967           }
4968         }
4969         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4970         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4971       }
4972       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4973       if (compute_eigs) {
4974         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4975         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4976       }
4977     }
4978   }
4979   /* print additional info */
4980   if (pcbddc->dbg_flag) {
4981     /* waits until all processes reaches this point */
4982     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4983     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4984     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4985   }
4986 
4987   /* free memory */
4988   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4989   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4990   PetscFunctionReturn(0);
4991 }
4992 
4993 #undef __FUNCT__
4994 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4995 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4996 {
4997   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4998   PC_IS*         pcis = (PC_IS*)pc->data;
4999   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5000   IS             subset,subset_mult,subset_n;
5001   PetscInt       local_size,coarse_size=0;
5002   PetscInt       *local_primal_indices=NULL;
5003   const PetscInt *t_local_primal_indices;
5004   PetscErrorCode ierr;
5005 
5006   PetscFunctionBegin;
5007   /* Compute global number of coarse dofs */
5008   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5009     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5010   }
5011   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5012   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5013   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5014   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5015   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5016   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5017   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5018   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5019   if (local_size != pcbddc->local_primal_size) {
5020     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5021   }
5022   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5023   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5024   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5025   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5026   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5027 
5028   /* check numbering */
5029   if (pcbddc->dbg_flag) {
5030     PetscScalar coarsesum,*array,*array2;
5031     PetscInt    i;
5032     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5033 
5034     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5035     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5036     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5037     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5038     /* counter */
5039     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5040     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5041     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5042     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5043     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5044     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5045 
5046     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5047     for (i=0;i<pcbddc->local_primal_size;i++) {
5048       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5049     }
5050     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5051     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5052     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5053     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5054     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5055     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5056     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5057     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5058     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5059     for (i=0;i<pcis->n;i++) {
5060       if (array[i] != 0.0 && array[i] != array2[i]) {
5061         PetscInt owned = (PetscInt)(array[i]);
5062         PetscInt neigh = (PetscInt)(array2[i]);
5063         set_error = PETSC_TRUE;
5064         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5065       }
5066     }
5067     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5068     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5069     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5070     for (i=0;i<pcis->n;i++) {
5071       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5072     }
5073     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5074     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5075     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5076     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5077     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5078     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5079     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5080       PetscInt *gidxs;
5081 
5082       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5083       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5084       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5085       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5086       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5087       for (i=0;i<pcbddc->local_primal_size;i++) {
5088         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5089       }
5090       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5091       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5092     }
5093     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5094     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5095   }
5096   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5097   /* get back data */
5098   *coarse_size_n = coarse_size;
5099   *local_primal_indices_n = local_primal_indices;
5100   PetscFunctionReturn(0);
5101 }
5102 
5103 #undef __FUNCT__
5104 #define __FUNCT__ "PCBDDCGlobalToLocal"
5105 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5106 {
5107   IS             localis_t;
5108   PetscInt       i,lsize,*idxs,n;
5109   PetscScalar    *vals;
5110   PetscErrorCode ierr;
5111 
5112   PetscFunctionBegin;
5113   /* get indices in local ordering exploiting local to global map */
5114   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5115   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5116   for (i=0;i<lsize;i++) vals[i] = 1.0;
5117   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5118   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5119   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5120   if (idxs) { /* multilevel guard */
5121     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5122   }
5123   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5124   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5125   ierr = PetscFree(vals);CHKERRQ(ierr);
5126   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5127   /* now compute set in local ordering */
5128   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5129   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5130   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5131   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5132   for (i=0,lsize=0;i<n;i++) {
5133     if (PetscRealPart(vals[i]) > 0.5) {
5134       lsize++;
5135     }
5136   }
5137   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5138   for (i=0,lsize=0;i<n;i++) {
5139     if (PetscRealPart(vals[i]) > 0.5) {
5140       idxs[lsize++] = i;
5141     }
5142   }
5143   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5144   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5145   *localis = localis_t;
5146   PetscFunctionReturn(0);
5147 }
5148 
5149 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5150 #undef __FUNCT__
5151 #define __FUNCT__ "PCBDDCMatMult_Private"
5152 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5153 {
5154   PCBDDCChange_ctx change_ctx;
5155   PetscErrorCode   ierr;
5156 
5157   PetscFunctionBegin;
5158   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5159   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5160   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5161   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5162   PetscFunctionReturn(0);
5163 }
5164 
5165 #undef __FUNCT__
5166 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5167 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5168 {
5169   PCBDDCChange_ctx change_ctx;
5170   PetscErrorCode   ierr;
5171 
5172   PetscFunctionBegin;
5173   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5174   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5175   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5176   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5177   PetscFunctionReturn(0);
5178 }
5179 
5180 #undef __FUNCT__
5181 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5182 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5183 {
5184   PC_IS               *pcis=(PC_IS*)pc->data;
5185   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5186   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5187   Mat                 S_j;
5188   PetscInt            *used_xadj,*used_adjncy;
5189   PetscBool           free_used_adj;
5190   PetscErrorCode      ierr;
5191 
5192   PetscFunctionBegin;
5193   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5194   free_used_adj = PETSC_FALSE;
5195   if (pcbddc->sub_schurs_layers == -1) {
5196     used_xadj = NULL;
5197     used_adjncy = NULL;
5198   } else {
5199     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5200       used_xadj = pcbddc->mat_graph->xadj;
5201       used_adjncy = pcbddc->mat_graph->adjncy;
5202     } else if (pcbddc->computed_rowadj) {
5203       used_xadj = pcbddc->mat_graph->xadj;
5204       used_adjncy = pcbddc->mat_graph->adjncy;
5205     } else {
5206       PetscBool      flg_row=PETSC_FALSE;
5207       const PetscInt *xadj,*adjncy;
5208       PetscInt       nvtxs;
5209 
5210       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5211       if (flg_row) {
5212         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5213         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5214         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5215         free_used_adj = PETSC_TRUE;
5216       } else {
5217         pcbddc->sub_schurs_layers = -1;
5218         used_xadj = NULL;
5219         used_adjncy = NULL;
5220       }
5221       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5222     }
5223   }
5224 
5225   /* setup sub_schurs data */
5226   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5227   if (!sub_schurs->use_mumps) {
5228     /* pcbddc->ksp_D up to date only if not using MUMPS */
5229     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5230     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5231   } else {
5232     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5233     PetscBool isseqaij;
5234     if (!pcbddc->use_vertices && reuse_solvers) {
5235       PetscInt n_vertices;
5236 
5237       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5238       reuse_solvers = (PetscBool)!n_vertices;
5239     }
5240     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5241     if (!isseqaij) {
5242       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5243       if (matis->A == pcbddc->local_mat) {
5244         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5245         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5246       } else {
5247         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5248       }
5249     }
5250     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5251   }
5252   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5253 
5254   /* free adjacency */
5255   if (free_used_adj) {
5256     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5257   }
5258   PetscFunctionReturn(0);
5259 }
5260 
5261 #undef __FUNCT__
5262 #define __FUNCT__ "PCBDDCInitSubSchurs"
5263 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5264 {
5265   PC_IS               *pcis=(PC_IS*)pc->data;
5266   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5267   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5268   PCBDDCGraph         graph;
5269   PetscErrorCode      ierr;
5270 
5271   PetscFunctionBegin;
5272   /* attach interface graph for determining subsets */
5273   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5274     IS       verticesIS,verticescomm;
5275     PetscInt vsize,*idxs;
5276 
5277     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5278     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5279     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5280     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5281     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5282     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5283     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5284     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5285     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5286     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5287     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5288 /*
5289     if (pcbddc->dbg_flag) {
5290       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5291     }
5292 */
5293   } else {
5294     graph = pcbddc->mat_graph;
5295   }
5296 
5297   /* sub_schurs init */
5298   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5299 
5300   /* free graph struct */
5301   if (pcbddc->sub_schurs_rebuild) {
5302     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5303   }
5304   PetscFunctionReturn(0);
5305 }
5306 
5307 #undef __FUNCT__
5308 #define __FUNCT__ "PCBDDCCheckOperator"
5309 PetscErrorCode PCBDDCCheckOperator(PC pc)
5310 {
5311   PC_IS               *pcis=(PC_IS*)pc->data;
5312   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5313   PetscErrorCode      ierr;
5314 
5315   PetscFunctionBegin;
5316   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5317     IS             zerodiag = NULL;
5318     Mat            S_j,B0=NULL,B0_B=NULL;
5319     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5320     PetscScalar    norm,p0_check,*array,*array2;
5321     PetscInt       i;
5322 
5323     /* B0 and B0_B */
5324     if (zerodiag) {
5325       IS       dummy;
5326       PetscInt ii[2];
5327 
5328       ii[0] = 0;
5329       ii[1] = pcbddc->B0_ncol;
5330       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
5331       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
5332       ierr = MatGetSubMatrix(B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5333       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5334       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5335     }
5336     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5337     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5338     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5339     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5340     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5341     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5342     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5343     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5344     /* S_j */
5345     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5346     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5347 
5348     /* mimic vector in \widetilde{W}_\Gamma */
5349     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5350     /* continuous in primal space */
5351     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5352     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5353     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5354     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5355     if (zerodiag) {
5356       p0_check = array[pcbddc->local_primal_size-1];
5357     } else {
5358       p0_check = 0;
5359     }
5360     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5361     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5362     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5363     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5364     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5365     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5366     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5367     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5368 
5369     /* assemble rhs for coarse problem */
5370     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5371     /* local with Schur */
5372     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5373     if (zerodiag) {
5374       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5375       array[0] = p0_check;
5376       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5377       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5378     }
5379     /* sum on primal nodes the local contributions */
5380     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5381     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5382     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5383     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5384     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5385     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5386     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5387     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5388     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5389     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5390     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5391     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5392     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5393     /* scale primal nodes (BDDC sums contibutions) */
5394     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5395     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5396     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5397     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5398     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5399     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5400     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5401     /* global: \widetilde{B0}_B w_\Gamma */
5402     if (zerodiag) {
5403       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5404       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5405       pcbddc->benign_p0 = array[0];
5406       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5407     } else {
5408       pcbddc->benign_p0 = 0.;
5409     }
5410     /* BDDC */
5411     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5412     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5413 
5414     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5415     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5416     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5417     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5418     if (pcbddc->benign_p0_lidx >= 0) {
5419       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0 error is %1.4e\n",PetscGlobalRank,PetscAbsScalar(pcbddc->benign_p0-p0_check));
5420     }
5421 
5422     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5423     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5424     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5425     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5426     ierr = MatDestroy(&B0);CHKERRQ(ierr);
5427     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5428   }
5429   PetscFunctionReturn(0);
5430 }
5431