xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 75c01103f2819f4a39b108c2eabb426c649b60cd)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignCheck"
10 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PC_IS*         pcis = (PC_IS*)(pc->data);
14   IS             dirIS = NULL;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr);
19   if (zerodiag) {
20     Mat            A;
21     Vec            vec3_N;
22     PetscScalar    *vals;
23     const PetscInt *idxs;
24     PetscInt       i,nz;
25 
26     /* p0 */
27     ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr);
28     ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr);
29     ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
30     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
31     for (i=0;i<nz;i++) vals[i] = 1.; /* TODO add quadrature */
32     ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
33     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
34     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
35     /* v_I */
36     ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr);
37     for (i=0;i<nz;i++) vals[i] = 0.;
38     ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
39     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
40     ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
41     for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
42     ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
43     ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
44     if (dirIS) {
45       PetscInt n;
46 
47       ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr);
48       ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr);
49       for (i=0;i<n;i++) vals[i] = 0.;
50       ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
51       ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr);
52     }
53     ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr);
54     ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr);
55     ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr);
56     ierr = VecSet(vec3_N,0.);CHKERRQ(ierr);
57     ierr = MatISGetLocalMat(pc->mat,&A);CHKERRQ(ierr);
58     ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr);
59     ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr);
60     if (PetscAbsScalar(vals[0]) > PETSC_SMALL) { /* TODO: should I add the A-norm in test? */
61       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %f (should be numerically 0.)",PetscAbsScalar(vals[0]));
62     }
63     ierr = PetscFree(vals);CHKERRQ(ierr);
64     ierr = VecDestroy(&vec3_N);CHKERRQ(ierr);
65   }
66   ierr = ISDestroy(&dirIS);CHKERRQ(ierr);
67 
68   /* check PCBDDCBenignGetOrSetP0 */
69   ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr);
70   pcbddc->benign_p0 = -PetscGlobalRank;
71   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr);
72   pcbddc->benign_p0 = 1;
73   ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr);
74   if (pcbddc->benign_p0_gidx >=0 && (PetscInt)PetscRealPart(pcbddc->benign_p0) != -PetscGlobalRank) {
75     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %1.4e instead of %1.4e\n",pcbddc->benign_p0,-PetscGlobalRank);CHKERRQ(ierr);
76   }
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCBenignDetectSaddlePoint"
82 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal)
83 {
84   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
85   IS             pressures,zerodiag;
86   PetscInt       nz,n;
87   PetscBool      sorted,have_null;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
92   ierr = PetscObjectReference((PetscObject)pcbddc->local_mat);CHKERRQ(ierr);
93   pcbddc->benign_original_mat = pcbddc->local_mat;
94   /* if a local info on dofs is present, assumes the last field is represented by "pressures", otherwise, uses only zerodiagonal dofs (ok if the pressure block is all zero) */
95   if (pcbddc->n_ISForDofsLocal) {
96     PetscInt p = pcbddc->n_ISForDofsLocal-1;
97     ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[p]);CHKERRQ(ierr);
98     pressures = pcbddc->ISForDofsLocal[p];
99     ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr);
100     if (!sorted) {
101       ierr = ISSort(pressures);CHKERRQ(ierr);
102     }
103   } else {
104     pressures = NULL;
105   }
106   ierr = MatFindZeroDiagonals(pcbddc->benign_original_mat,&zerodiag);CHKERRQ(ierr);
107   ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr);
108   if (!sorted) {
109     ierr = ISSort(zerodiag);CHKERRQ(ierr);
110   }
111   /*
112      Check if all the pressure dofs have a zero diagonal
113      If not, a change of basis on pressures is not needed
114      since the local Schur complements are SPD
115   */
116   ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr);
117   if (pressures) {
118     PetscInt  np;
119     PetscBool sameis;
120 
121     sameis = PETSC_FALSE;
122     ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr);
123     if (nz == np) {
124       const PetscInt *idxs,*pidxs;
125 
126       ierr = ISGetIndices(pressures,&pidxs);CHKERRQ(ierr);
127       ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
128       ierr = PetscMemcmp(pidxs,idxs,np*sizeof(PetscInt),&sameis);CHKERRQ(ierr);
129       ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
130       ierr = ISRestoreIndices(pressures,&pidxs);CHKERRQ(ierr);
131     }
132     if (!sameis) { /* destroy index sets and set nz to 0 to avoid next code branch */
133       ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
134       nz = 0;
135     }
136     ierr = ISDestroy(&pressures);CHKERRQ(ierr);
137   }
138   if (nz) {
139     IS                zerodiagc;
140     PetscScalar       *array;
141     const PetscInt    *idxs,*idxsc;
142     PetscInt          i,*nnz;
143 
144     /* TODO: add check for shared dofs and raise error */
145     ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
146     ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr);
147     ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr);
148     ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
149     /* local change of basis for pressures */
150     ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
151     ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->benign_original_mat),&pcbddc->benign_change);CHKERRQ(ierr);
152     ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr);
153     ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
154     ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr);
155     for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities */
156     for (i=0;i<nz-1;i++) nnz[idxs[i]] = 2; /* change on pressures */
157     nnz[idxs[nz-1]] = nz; /* last local pressure dof: _0 set */
158     ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr);
159     ierr = PetscFree(nnz);CHKERRQ(ierr);
160     /* set identity on velocities */
161     for (i=0;i<n-nz;i++) {
162       ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr);
163     }
164     /* set change on pressures */
165     for (i=0;i<nz-1;i++) {
166       PetscScalar vals[2];
167       PetscInt    cols[2];
168 
169       /* TODO: add quadrature */
170       cols[0] = idxs[i];
171       cols[1] = idxs[nz-1];
172       vals[0] = 1.;
173       vals[1] = 1.;
174       ierr = MatSetValues(pcbddc->benign_change,1,idxs+i,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
175     }
176     ierr = PetscMalloc1(nz,&array);CHKERRQ(ierr);
177     for (i=0;i<nz-1;i++) array[i] = -1.;
178     array[nz-1] = 1.;
179     ierr = MatSetValues(pcbddc->benign_change,1,idxs+nz-1,nz,idxs,array,INSERT_VALUES);CHKERRQ(ierr);
180     ierr = PetscFree(array);CHKERRQ(ierr);
181     ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
182     ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
183     /* TODO: need optimization? */
184     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
185     ierr = MatPtAP(pcbddc->benign_original_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
186     /* store local and global idxs for p0 */
187     pcbddc->benign_p0_lidx = idxs[nz-1];
188     ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,1,&idxs[nz-1],&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
189     ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr);
190     ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr);
191     ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr);
192   } else { /* if nz == 0, destroy local IS */
193     ierr = ISDestroy(&zerodiag);CHKERRQ(ierr);
194   }
195 
196   /* determines if the coarse solver will be singular or not */
197   ierr = MatGetLocalSize(pcbddc->benign_original_mat,&n,NULL);CHKERRQ(ierr);
198   have_null = PETSC_TRUE;
199   if (!zerodiag && n) have_null = PETSC_FALSE;
200   ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
201   *zerodiaglocal = zerodiag;
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "PCBDDCBenignGetOrSetP0"
207 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
208 {
209   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
210   PetscErrorCode ierr;
211 
212   PetscFunctionBegin;
213   if (!pcbddc->benign_sf) {
214     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
215     if (pcbddc->benign_p0_gidx >= 0) {
216       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
217     } else {
218       ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,0,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
219     }
220   }
221   if (get) { /* use SF to get values */
222     PetscScalar *array;
223 
224     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
225     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
226     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
227     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
228   } else { /* use VecSetValue */
229     if (pcbddc->benign_p0_gidx >= 0) {
230       ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
231     }
232     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
233     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
240 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
241 {
242   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
243   PetscErrorCode ierr;
244 
245   PetscFunctionBegin;
246   /* TODO: add error checking
247     - avoid nested pop (or push) calls.
248     - cannot push before pop.
249     - cannot call this if pcbddc->local_mat is NULL
250   */
251   if (pcbddc->benign_p0_lidx < 0) {
252     PetscFunctionReturn(0);
253   }
254   if (pop) {
255     const PetscInt    *cB0_cols;
256     PetscInt          cB0_ncol;
257     const PetscScalar *cB0_vals;
258 
259     /* extract B_0 */
260     ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
261     pcbddc->B0_ncol = cB0_ncol;
262     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
263     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
264     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
265     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
266     ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
267     /* remove rows and cols from local problem */
268     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
269     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr);
270   } else { /* push */
271     ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
272     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
273     ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr);
274     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
275     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
276   }
277   PetscFunctionReturn(0);
278 }
279 
280 #undef __FUNCT__
281 #define __FUNCT__ "PCBDDCAdaptiveSelection"
282 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
283 {
284   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
285   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
286   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
287   PetscBLASInt    *B_iwork,*B_ifail;
288   PetscScalar     *work,lwork;
289   PetscScalar     *St,*S,*eigv;
290   PetscScalar     *Sarray,*Starray;
291   PetscReal       *eigs,thresh;
292   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
293   PetscBool       allocated_S_St;
294 #if defined(PETSC_USE_COMPLEX)
295   PetscReal       *rwork;
296 #endif
297   PetscErrorCode  ierr;
298 
299   PetscFunctionBegin;
300   if (!sub_schurs->use_mumps) {
301     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
302   }
303 
304   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
305     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
306   }
307 
308   if (pcbddc->dbg_flag) {
309     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
310     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
311     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
312     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
313   }
314 
315   if (pcbddc->dbg_flag) {
316     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
317   }
318 
319   /* max size of subsets */
320   mss = 0;
321   for (i=0;i<sub_schurs->n_subs;i++) {
322     PetscInt subset_size;
323 
324     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
325     mss = PetscMax(mss,subset_size);
326   }
327 
328   /* min/max and threshold */
329   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
330   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
331   nmax = PetscMax(nmin,nmax);
332   allocated_S_St = PETSC_FALSE;
333   if (nmin) {
334     allocated_S_St = PETSC_TRUE;
335   }
336 
337   /* allocate lapack workspace */
338   cum = cum2 = 0;
339   maxneigs = 0;
340   for (i=0;i<sub_schurs->n_subs;i++) {
341     PetscInt n,subset_size;
342 
343     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
344     n = PetscMin(subset_size,nmax);
345     cum += subset_size;
346     cum2 += subset_size*n;
347     maxneigs = PetscMax(maxneigs,n);
348   }
349   if (mss) {
350     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
351       PetscBLASInt B_itype = 1;
352       PetscBLASInt B_N = mss;
353       PetscReal    zero = 0.0;
354       PetscReal    eps = 0.0; /* dlamch? */
355 
356       B_lwork = -1;
357       S = NULL;
358       St = NULL;
359       eigs = NULL;
360       eigv = NULL;
361       B_iwork = NULL;
362       B_ifail = NULL;
363 #if defined(PETSC_USE_COMPLEX)
364       rwork = NULL;
365 #endif
366       thresh = 1.0;
367       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
368 #if defined(PETSC_USE_COMPLEX)
369       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
370 #else
371       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
372 #endif
373       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
374       ierr = PetscFPTrapPop();CHKERRQ(ierr);
375     } else {
376         /* TODO */
377     }
378   } else {
379     lwork = 0;
380   }
381 
382   nv = 0;
383   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
384     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
385   }
386   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
387   if (allocated_S_St) {
388     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
389   }
390   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
391 #if defined(PETSC_USE_COMPLEX)
392   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
393 #endif
394   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
395                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
396                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
397                       nv+cum,&pcbddc->adaptive_constraints_idxs,
398                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
399   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
400 
401   maxneigs = 0;
402   cum = cum2 = cumarray = 0;
403   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
404   pcbddc->adaptive_constraints_data_ptr[0] = 0;
405   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
406     const PetscInt *idxs;
407 
408     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
409     for (cum=0;cum<nv;cum++) {
410       pcbddc->adaptive_constraints_n[cum] = 1;
411       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
412       pcbddc->adaptive_constraints_data[cum] = 1.0;
413       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
414       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
415     }
416     cum2 = cum;
417     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
418   }
419 
420   if (mss) { /* multilevel */
421     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
422     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
423   }
424 
425   for (i=0;i<sub_schurs->n_subs;i++) {
426 
427     const PetscInt *idxs;
428     PetscReal      infty = PETSC_MAX_REAL;
429     PetscInt       j,subset_size,eigs_start = 0;
430     PetscBLASInt   B_N;
431     PetscBool      same_data = PETSC_FALSE;
432 
433     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
434     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
435     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
436       if (sub_schurs->is_hermitian) {
437         PetscInt j,k;
438         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
439           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
440           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
441         }
442         for (j=0;j<subset_size;j++) {
443           for (k=j;k<subset_size;k++) {
444             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
445             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
446           }
447         }
448       } else {
449         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
450         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
451       }
452     } else {
453       S = Sarray + cumarray;
454       St = Starray + cumarray;
455     }
456 
457     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
458     /* see if we can save some work */
459     if (sub_schurs->n_subs == 1) {
460       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
461     }
462 
463     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
464       B_neigs = 0;
465     } else {
466       /* Threshold: this is an heuristic for edges */
467       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
468 
469       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
470         PetscBLASInt B_itype = 1;
471         PetscBLASInt B_IL, B_IU;
472         PetscReal    eps = -1.0; /* dlamch? */
473         PetscInt     nmin_s;
474 
475         if (pcbddc->dbg_flag) {
476           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
477         }
478 
479         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
480         if (thresh > 1.+PETSC_SMALL) {
481 
482           /* ask for eigenvalues larger than thresh */
483 #if defined(PETSC_USE_COMPLEX)
484           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
485 #else
486           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
487 #endif
488         } else {
489           B_IU = PetscMax(1,PetscMin(B_N,nmax));
490           B_IL = 1;
491 #if defined(PETSC_USE_COMPLEX)
492           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
493 #else
494           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
495 #endif
496         }
497         ierr = PetscFPTrapPop();CHKERRQ(ierr);
498         if (B_ierr) {
499           if (B_ierr < 0 ) {
500             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
501           } else if (B_ierr <= B_N) {
502             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
503           } else {
504             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
505           }
506         }
507 
508         if (B_neigs > nmax) {
509           if (pcbddc->dbg_flag) {
510             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
511           }
512           eigs_start = B_neigs -nmax;
513           B_neigs = nmax;
514         }
515 
516         nmin_s = PetscMin(nmin,B_N);
517         if (B_neigs < nmin_s) {
518           PetscBLASInt B_neigs2;
519 
520           B_IU = B_N - B_neigs;
521           B_IL = B_N - nmin_s + 1;
522           if (pcbddc->dbg_flag) {
523             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
524           }
525           if (sub_schurs->is_hermitian) {
526             PetscInt j;
527             for (j=0;j<subset_size;j++) {
528               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
529             }
530             for (j=0;j<subset_size;j++) {
531               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
532             }
533           } else {
534             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
535             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
536           }
537           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
538 #if defined(PETSC_USE_COMPLEX)
539           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
540 #else
541           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
542 #endif
543           ierr = PetscFPTrapPop();CHKERRQ(ierr);
544           B_neigs += B_neigs2;
545         }
546         if (B_ierr) {
547           if (B_ierr < 0 ) {
548             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
549           } else if (B_ierr <= B_N) {
550             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
551           } else {
552             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
553           }
554         }
555         if (pcbddc->dbg_flag) {
556           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
557           for (j=0;j<B_neigs;j++) {
558             if (eigs[j] == 0.0) {
559               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
560             } else {
561               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
562             }
563           }
564         }
565       } else {
566           /* TODO */
567       }
568     }
569     maxneigs = PetscMax(B_neigs,maxneigs);
570     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
571     if (B_neigs) {
572       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
573 
574       if (pcbddc->dbg_flag > 1) {
575         PetscInt ii;
576         for (ii=0;ii<B_neigs;ii++) {
577           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
578           for (j=0;j<B_N;j++) {
579 #if defined(PETSC_USE_COMPLEX)
580             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
581             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
582             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
583 #else
584             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
585 #endif
586           }
587         }
588       }
589 #if 0
590       for (j=0;j<B_neigs;j++) {
591         PetscBLASInt Blas_N,Blas_one = 1.0;
592         PetscScalar norm;
593         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
594         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
595                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
596         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
597           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
598         } else {
599           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
600         }
601         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
602       }
603 #endif
604       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
605       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
606       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
607       cum++;
608     }
609     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
610     /* shift for next computation */
611     cumarray += subset_size*subset_size;
612   }
613   if (pcbddc->dbg_flag) {
614     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
615   }
616 
617   if (mss) {
618     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
619     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
620     /* destroy matrices (junk) */
621     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
622     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
623   }
624   if (allocated_S_St) {
625     ierr = PetscFree2(S,St);CHKERRQ(ierr);
626   }
627   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
628 #if defined(PETSC_USE_COMPLEX)
629   ierr = PetscFree(rwork);CHKERRQ(ierr);
630 #endif
631   if (pcbddc->dbg_flag) {
632     PetscInt maxneigs_r;
633     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
634     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "PCBDDCSetUpSolvers"
641 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
642 {
643   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
644   PetscScalar    *coarse_submat_vals;
645   PetscErrorCode ierr;
646 
647   PetscFunctionBegin;
648   /* Setup local scatters R_to_B and (optionally) R_to_D */
649   /* PCBDDCSetUpLocalWorkVectors should be called first! */
650   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
651 
652   /* Setup local neumann solver ksp_R */
653   /* PCBDDCSetUpLocalScatters should be called first! */
654   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
655 
656   /* Change global null space passed in by the user if change of basis has been requested */
657   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
658     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
659   }
660 
661   /*
662      Setup local correction and local part of coarse basis.
663      Gives back the dense local part of the coarse matrix in column major ordering
664   */
665   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
666 
667   /* Compute total number of coarse nodes and setup coarse solver */
668   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
669 
670   /* free */
671   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 #undef __FUNCT__
676 #define __FUNCT__ "PCBDDCResetCustomization"
677 PetscErrorCode PCBDDCResetCustomization(PC pc)
678 {
679   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
680   PetscErrorCode ierr;
681 
682   PetscFunctionBegin;
683   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
684   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
685   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
686   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
687   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
688   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
689   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
690   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
691   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
692   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
693   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "PCBDDCResetTopography"
699 PetscErrorCode PCBDDCResetTopography(PC pc)
700 {
701   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
702   PetscErrorCode ierr;
703 
704   PetscFunctionBegin;
705   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
706   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
707   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
708   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
709   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "PCBDDCResetSolvers"
715 PetscErrorCode PCBDDCResetSolvers(PC pc)
716 {
717   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
718   PetscScalar    *array;
719   PetscErrorCode ierr;
720 
721   PetscFunctionBegin;
722   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
723   if (pcbddc->coarse_phi_B) {
724     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
725     ierr = PetscFree(array);CHKERRQ(ierr);
726   }
727   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
728   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
729   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
730   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
731   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
732   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
733   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
734   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
735   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
736   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
737   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
738   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
739   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
740   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
741   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
742   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
743   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
744   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
745   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
746   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
747   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
748   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
749   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
750   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
751   ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr);
752   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
753   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
754   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
760 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
761 {
762   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
763   PC_IS          *pcis = (PC_IS*)pc->data;
764   VecType        impVecType;
765   PetscInt       n_constraints,n_R,old_size,n_benign;
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   if (!pcbddc->ConstraintMatrix) {
770     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
771   }
772   /* get sizes */
773   n_benign = 0;
774   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
775   n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices;
776   n_R = pcis->n - pcbddc->n_vertices;
777   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
778   /* local work vectors (try to avoid unneeded work)*/
779   /* R nodes */
780   old_size = -1;
781   if (pcbddc->vec1_R) {
782     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
783   }
784   if (n_R != old_size) {
785     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
786     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
787     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
788     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
789     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
790     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
791   }
792   /* local primal dofs */
793   old_size = -1;
794   if (pcbddc->vec1_P) {
795     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
796   }
797   if (pcbddc->local_primal_size != old_size) {
798     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
799     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
800     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
801     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
802   }
803   /* local explicit constraints */
804   old_size = -1;
805   if (pcbddc->vec1_C) {
806     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
807   }
808   if (n_constraints && n_constraints != old_size) {
809     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
810     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
811     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
812     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
813   }
814   PetscFunctionReturn(0);
815 }
816 
817 #undef __FUNCT__
818 #define __FUNCT__ "PCBDDCSetUpCorrection"
819 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
820 {
821   PetscErrorCode  ierr;
822   /* pointers to pcis and pcbddc */
823   PC_IS*          pcis = (PC_IS*)pc->data;
824   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
825   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
826   /* submatrices of local problem */
827   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
828   /* submatrices of benign trick */
829   Mat             B0_V = NULL;
830   /* submatrices of local coarse problem */
831   Mat             S_VV,S_CV,S_VC,S_CC;
832   /* working matrices */
833   Mat             C_CR;
834   /* additional working stuff */
835   PC              pc_R;
836   Mat             F,B0 = NULL;
837   PetscBool       isLU,isCHOL,isILU;
838 
839   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
840   PetscScalar     *work;
841   PetscInt        *idx_V_B;
842   PetscInt        n,n_vertices,n_constraints,n_benign,p0_lidx_I = 0;
843   PetscInt        i,n_R,n_D,n_B;
844   PetscBool       unsymmetric_check;
845   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
846   MatType         impMatType;
847   /* some shortcuts to scalars */
848   PetscScalar     one=1.0,m_one=-1.0;
849 
850   PetscFunctionBegin;
851   n_benign = 0;
852   if (pcbddc->benign_p0_lidx >= 0) n_benign = 1;
853   n_vertices = pcbddc->n_vertices;
854   n_constraints = pcbddc->local_primal_size - n_benign - n_vertices;
855   /* Set Non-overlapping dimensions */
856   n_B = pcis->n_B;
857   n_D = pcis->n - n_B;
858   n_R = pcis->n - n_vertices;
859 
860   /* Set types for local objects needed by BDDC precondtioner */
861   impMatType = MATSEQDENSE;
862 
863   /* vertices in boundary numbering */
864   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
865   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
866   if (i != n_vertices) {
867     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
868   }
869 
870   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
871   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
872   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
873   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
874   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
875   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
876   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
877   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
878   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
879   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
880 
881   unsymmetric_check = PETSC_FALSE;
882   /* allocate workspace */
883   n = 0;
884   if (n_constraints) {
885     n += n_R*n_constraints;
886   }
887   if (n_vertices) {
888     n = PetscMax(2*n_R*n_vertices,n);
889     n = PetscMax((n_R+n_B)*n_vertices,n);
890   }
891   if (!pcbddc->symmetric_primal) {
892     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
893     unsymmetric_check = PETSC_TRUE;
894   }
895   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
896 
897   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
898   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
899   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
900   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
901   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
902   if (isLU || isILU || isCHOL) {
903     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
904   } else if (sub_schurs->reuse_mumps) {
905     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
906     MatFactorType type;
907 
908     F = reuse_mumps->F;
909     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
910     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
911   } else {
912     F = NULL;
913   }
914 
915   /* Precompute stuffs needed for preprocessing and application of BDDC*/
916   if (n_constraints) {
917     Mat         M1,M2,M3;
918     Mat         auxmat;
919     IS          is_aux;
920     PetscScalar *array,*array2;
921 
922     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
923     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
924 
925     /* Extract constraints on R nodes: C_{CR}  */
926     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
927     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
928     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
929 
930     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
931     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
932     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
933     for (i=0;i<n_constraints;i++) {
934       const PetscScalar *row_cmat_values;
935       const PetscInt    *row_cmat_indices;
936       PetscInt          size_of_constraint,j;
937 
938       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
939       for (j=0;j<size_of_constraint;j++) {
940         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
941       }
942       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
943     }
944     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
945     if (F) {
946       Mat B;
947 
948       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
949       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
950       ierr = MatDestroy(&B);CHKERRQ(ierr);
951     } else {
952       PetscScalar *marr;
953 
954       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
955       for (i=0;i<n_constraints;i++) {
956         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
957         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
958         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
959         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
960         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
961       }
962       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
963     }
964     if (!pcbddc->switch_static) {
965       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
966       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
967       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
968       for (i=0;i<n_constraints;i++) {
969         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
970         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
971         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
974         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
975       }
976       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
977       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
978       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
979     } else {
980       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
981       pcbddc->local_auxmat2 = local_auxmat2_R;
982       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
983     }
984     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
985     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
986     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
987     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
988     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
989     if (isCHOL) {
990       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
991     } else {
992       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
993     }
994     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
995     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
996     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
997     ierr = MatDestroy(&M2);CHKERRQ(ierr);
998     ierr = MatDestroy(&M3);CHKERRQ(ierr);
999     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
1000     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
1001     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
1002     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
1003     ierr = MatDestroy(&M1);CHKERRQ(ierr);
1004   }
1005   /* Get submatrices from subdomain matrix */
1006   if (n_benign) {
1007     IS        dummy;
1008     Mat       B0_R;
1009     PetscReal norm;
1010     PetscInt  ii[2];
1011 
1012     ii[0] = 0;
1013     ii[1] = pcbddc->B0_ncol;
1014     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
1015     ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
1016     ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
1017     ierr = MatNorm(B0_R,NORM_INFINITY,&norm);CHKERRQ(ierr);
1018     if (norm > PETSC_SMALL) {
1019       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! ||B0_R|| = %f (should be numerically 0.)",norm);
1020     }
1021     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1022     ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1023   }
1024 
1025   if (n_vertices) {
1026     IS is_aux;
1027 
1028     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
1029       IS tis;
1030 
1031       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
1032       ierr = ISSort(tis);CHKERRQ(ierr);
1033       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
1034       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1035     } else {
1036       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
1037     }
1038     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
1039     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
1040     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
1041     if (n_benign) {
1042       IS dummy;
1043 
1044       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
1045       ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
1046       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
1047     }
1048     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
1049   }
1050 
1051   /* Matrix of coarse basis functions (local) */
1052   if (pcbddc->coarse_phi_B) {
1053     PetscInt on_B,on_primal,on_D=n_D;
1054     if (pcbddc->coarse_phi_D) {
1055       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
1056     }
1057     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
1058     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
1059       PetscScalar *marray;
1060 
1061       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
1062       ierr = PetscFree(marray);CHKERRQ(ierr);
1063       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1064       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1065       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1066       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1067     }
1068   }
1069 
1070   if (!pcbddc->coarse_phi_B) {
1071     PetscScalar *marray;
1072 
1073     n = n_B*pcbddc->local_primal_size;
1074     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1075       n += n_D*pcbddc->local_primal_size;
1076     }
1077     if (!pcbddc->symmetric_primal) {
1078       n *= 2;
1079     }
1080     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
1081     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
1082     n = n_B*pcbddc->local_primal_size;
1083     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1084       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
1085       n += n_D*pcbddc->local_primal_size;
1086     }
1087     if (!pcbddc->symmetric_primal) {
1088       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
1089       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1090         n = n_B*pcbddc->local_primal_size;
1091         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
1092       }
1093     } else {
1094       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
1095       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
1096       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1097         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
1098         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
1099       }
1100     }
1101   }
1102 
1103   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
1104   if (n_benign && (pcbddc->switch_static || pcbddc->dbg_flag)) {
1105     const PetscInt *idxs;
1106 
1107     ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1108     ierr = PetscFindInt(pcbddc->benign_p0_lidx,pcis->n-pcis->n_B,idxs,&p0_lidx_I);CHKERRQ(ierr);
1109     ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr);
1110   }
1111 
1112   /* vertices */
1113   if (n_vertices) {
1114 
1115     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
1116 
1117     if (n_R) {
1118       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
1119       PetscBLASInt B_N,B_one = 1;
1120       PetscScalar  *x,*y;
1121       PetscBool    isseqaij;
1122 
1123       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
1124       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
1125       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
1126       if (F) { /* TODO could be optimized for symmetric problems */
1127         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
1128       } else {
1129         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
1130         for (i=0;i<n_vertices;i++) {
1131           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
1132           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
1133           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1134           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1135           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1136         }
1137         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
1138       }
1139       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1140       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
1141       if (n_constraints) {
1142         Mat B;
1143 
1144         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
1145         for (i=0;i<n_vertices;i++) {
1146           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1147           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
1148           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1151           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1152         }
1153         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1154         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
1155         ierr = MatDestroy(&B);CHKERRQ(ierr);
1156         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
1157         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1158         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
1159         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
1160         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
1161         ierr = MatDestroy(&B);CHKERRQ(ierr);
1162       }
1163       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1164       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
1165         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
1166       }
1167       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
1168       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
1169       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
1170       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
1171       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
1172       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
1173       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
1174       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
1175       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1176       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
1177     } else {
1178       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1179     }
1180     if (n_benign) {
1181       const PetscScalar *vals;
1182       const PetscInt    *idxs;
1183       PetscInt          n,j;
1184 
1185       ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
1186       for (j=0;j<n;j++) {
1187         coarse_submat_vals[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+idxs[j]] = vals[j];
1188         coarse_submat_vals[(idxs[j]+1)*pcbddc->local_primal_size-1] = vals[j];
1189       }
1190       ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
1191     }
1192     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
1193 
1194     /* coarse basis functions */
1195     for (i=0;i<n_vertices;i++) {
1196       PetscScalar *y;
1197 
1198       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1199       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1200       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1201       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1202       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1203       y[n_B*i+idx_V_B[i]] = 1.0;
1204       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1205       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1206 
1207       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1208         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1209         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1210         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1211         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1213         if (n_benign) y[n_D*i+p0_lidx_I] = 0.0;
1214         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1215       }
1216       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1217     }
1218     /* if n_R == 0 the object is not destroyed */
1219     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1220   }
1221 
1222   if (n_constraints) {
1223     Mat B;
1224 
1225     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1226     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1227     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1228     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1229     if (n_vertices) {
1230       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1231         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1232       } else {
1233         Mat S_VCt;
1234 
1235         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1236         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1237         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1238       }
1239     }
1240     ierr = MatDestroy(&B);CHKERRQ(ierr);
1241     /* coarse basis functions */
1242     for (i=0;i<n_constraints;i++) {
1243       PetscScalar *y;
1244 
1245       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1246       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1247       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1248       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1249       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1251       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1252       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1253         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1254         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1255         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1258         if (n_benign) y[n_D*(i+n_vertices)+p0_lidx_I] = 0.0;
1259         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1260       }
1261       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1262     }
1263   }
1264   if (n_constraints) {
1265     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1266   }
1267 
1268   ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1269 
1270   /* compute other basis functions for non-symmetric problems */
1271   if (!pcbddc->symmetric_primal) {
1272 
1273     if (n_constraints) {
1274       Mat S_CCT,B_C;
1275 
1276       /* this is a lazy thing */
1277       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1278       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1279       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1280       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1281       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1282       if (n_vertices) {
1283         Mat B_V,S_VCT;
1284 
1285         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1286         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1287         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1288         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1289         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1290       }
1291       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1292     } else { /* if there are no constraints, reset work */
1293       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1294     }
1295     if (n_vertices && n_R) {
1296       Mat          A_VRT;
1297       PetscScalar  *marray;
1298       PetscBLASInt B_N,B_one = 1;
1299 
1300       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1301       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1302       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1303       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1304       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1305       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1306       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1307     }
1308 
1309     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1310       for (i=0;i<pcbddc->local_primal_size;i++) {
1311         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1312         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1313         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1314         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1315         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1316       }
1317     } else {
1318       for (i=0;i<pcbddc->local_primal_size;i++) {
1319         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1320         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1321         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1322         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1323         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1324       }
1325     }
1326     /* coarse basis functions */
1327     for (i=0;i<pcbddc->local_primal_size;i++) {
1328       PetscScalar *y;
1329 
1330       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1331       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1332       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1333       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335       if (i<n_vertices) {
1336         y[n_B*i+idx_V_B[i]] = 1.0;
1337       }
1338       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1339       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1340 
1341       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1342         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1343         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1344         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1345         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1346         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1347         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1348       }
1349       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1350     }
1351   }
1352   /* free memory */
1353   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1354   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1355   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1356   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1357   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1358   ierr = PetscFree(work);CHKERRQ(ierr);
1359   if (n_vertices) {
1360     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1361   }
1362   if (n_constraints) {
1363     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1364   }
1365   /* Checking coarse_sub_mat and coarse basis functios */
1366   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1367   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1368   if (pcbddc->dbg_flag) {
1369     Mat         coarse_sub_mat;
1370     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1371     Mat         coarse_phi_D,coarse_phi_B;
1372     Mat         coarse_psi_D,coarse_psi_B;
1373     Mat         A_II,A_BB,A_IB,A_BI;
1374     Mat         C_B,CPHI;
1375     IS          is_dummy;
1376     Vec         mones;
1377     MatType     checkmattype=MATSEQAIJ;
1378     PetscReal   real_value;
1379 
1380     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1381     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1382     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1383     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1384     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1385     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1386     if (unsymmetric_check) {
1387       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1388       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1389     }
1390     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1391 
1392     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1393     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1394     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1395     if (unsymmetric_check) {
1396       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1397       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1398       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1399       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1400       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1401       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1402       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1403       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1404       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1405       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1406       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1407       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1408     } else {
1409       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1410       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1411       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1412       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1413       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1414       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1415       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1416       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1417     }
1418     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1419     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1420     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1421     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1422     if (n_benign) {
1423       Mat B0_I,B0_B,B0_BPHI,B0_IPHI;
1424       PetscScalar *data,*data2;
1425 
1426       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&is_dummy);CHKERRQ(ierr);
1427       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
1428       ierr = MatGetSubMatrix(B0,is_dummy,pcis->is_I_local,MAT_INITIAL_MATRIX,&B0_I);
1429       ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr);
1430       ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_REUSE_MATRIX,&B0_BPHI);CHKERRQ(ierr);
1431       ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr);
1432       ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr);
1433       for (i=0;i<pcbddc->local_primal_size;i++) {
1434         data[(pcbddc->local_primal_size-1)*pcbddc->local_primal_size+i] += data2[i];
1435         data[(i+1)*pcbddc->local_primal_size-1] += data2[i];
1436       }
1437       ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr);
1438       ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr);
1439       ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
1440       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1441       ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr);
1442       ierr = MatMatMult(B0_I,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&B0_IPHI);CHKERRQ(ierr);
1443       ierr = MatDestroy(&B0_I);CHKERRQ(ierr);
1444       ierr = MatNorm(B0_IPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1445       ierr = MatDestroy(&B0_IPHI);CHKERRQ(ierr);
1446     }
1447 #if 0
1448   {
1449     PetscViewer viewer;
1450     char filename[256];
1451     sprintf(filename,"proj_local_coarse_mat%d.m",PetscGlobalRank);
1452     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
1453     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1454     ierr = MatView(TM1,viewer);CHKERRQ(ierr);
1455     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
1456   }
1457 #endif
1458     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1459     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1460     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1461     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1462 
1463     /* check constraints */
1464     if (!n_benign) {
1465       ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1466       ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1467       ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1468       ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1469       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1470       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1471       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1472       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1473       if (unsymmetric_check) {
1474         ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1475         ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1476         ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1477         ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1478         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1479       }
1480       ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1481       ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1482       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1483       ierr = VecDestroy(&mones);CHKERRQ(ierr);
1484     }
1485     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1486     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1487     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1488     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1489     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1490     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1491     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1492     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1493     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1494     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1495     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1496     if (unsymmetric_check) {
1497       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1498       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1499     }
1500     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1501   }
1502   ierr = MatDestroy(&B0);CHKERRQ(ierr);
1503   /* get back data */
1504   *coarse_submat_vals_n = coarse_submat_vals;
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1510 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1511 {
1512   Mat            *work_mat;
1513   IS             isrow_s,iscol_s;
1514   PetscBool      rsorted,csorted;
1515   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1516   PetscErrorCode ierr;
1517 
1518   PetscFunctionBegin;
1519   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1520   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1521   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1522   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1523 
1524   if (!rsorted) {
1525     const PetscInt *idxs;
1526     PetscInt *idxs_sorted,i;
1527 
1528     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1529     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1530     for (i=0;i<rsize;i++) {
1531       idxs_perm_r[i] = i;
1532     }
1533     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1534     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1535     for (i=0;i<rsize;i++) {
1536       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1537     }
1538     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1539     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1540   } else {
1541     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1542     isrow_s = isrow;
1543   }
1544 
1545   if (!csorted) {
1546     if (isrow == iscol) {
1547       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1548       iscol_s = isrow_s;
1549     } else {
1550       const PetscInt *idxs;
1551       PetscInt *idxs_sorted,i;
1552 
1553       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1554       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1555       for (i=0;i<csize;i++) {
1556         idxs_perm_c[i] = i;
1557       }
1558       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1559       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1560       for (i=0;i<csize;i++) {
1561         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1562       }
1563       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1564       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1565     }
1566   } else {
1567     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1568     iscol_s = iscol;
1569   }
1570 
1571   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1572 
1573   if (!rsorted || !csorted) {
1574     Mat      new_mat;
1575     IS       is_perm_r,is_perm_c;
1576 
1577     if (!rsorted) {
1578       PetscInt *idxs_r,i;
1579       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1580       for (i=0;i<rsize;i++) {
1581         idxs_r[idxs_perm_r[i]] = i;
1582       }
1583       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1584       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1585     } else {
1586       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1587     }
1588     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1589 
1590     if (!csorted) {
1591       if (isrow_s == iscol_s) {
1592         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1593         is_perm_c = is_perm_r;
1594       } else {
1595         PetscInt *idxs_c,i;
1596         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1597         for (i=0;i<csize;i++) {
1598           idxs_c[idxs_perm_c[i]] = i;
1599         }
1600         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1601         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1602       }
1603     } else {
1604       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1605     }
1606     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1607 
1608     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1609     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1610     work_mat[0] = new_mat;
1611     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1612     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1613   }
1614 
1615   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1616   *B = work_mat[0];
1617   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1618   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1619   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1625 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1626 {
1627   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1628   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1629   Mat            new_mat;
1630   IS             is_local,is_global;
1631   PetscInt       local_size;
1632   PetscBool      isseqaij;
1633   PetscErrorCode ierr;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1637   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1638   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1639   ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr);
1640   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1641   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1642   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1643 
1644   /* check */
1645   if (pcbddc->dbg_flag) {
1646     Vec       x,x_change;
1647     PetscReal error;
1648 
1649     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1650     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1651     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1652     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1653     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1654     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1655     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1657     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1658     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1659     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1660     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1661     ierr = VecDestroy(&x);CHKERRQ(ierr);
1662     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1663   }
1664 
1665   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1666   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1667   if (isseqaij) {
1668     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1669   } else {
1670     Mat work_mat;
1671     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1672     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1673     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1674   }
1675   if (matis->A->symmetric_set) {
1676     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1677 #if !defined(PETSC_USE_COMPLEX)
1678     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1679 #endif
1680   }
1681   /*
1682   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1683   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1684   */
1685   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1691 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1692 {
1693   PC_IS*          pcis = (PC_IS*)(pc->data);
1694   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1695   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1696   PetscInt        *idx_R_local=NULL;
1697   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1698   PetscInt        vbs,bs;
1699   PetscBT         bitmask=NULL;
1700   PetscErrorCode  ierr;
1701 
1702   PetscFunctionBegin;
1703   /*
1704     No need to setup local scatters if
1705       - primal space is unchanged
1706         AND
1707       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1708         AND
1709       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1710   */
1711   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1712     PetscFunctionReturn(0);
1713   }
1714   /* destroy old objects */
1715   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1716   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1717   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1718   /* Set Non-overlapping dimensions */
1719   n_B = pcis->n_B;
1720   n_D = pcis->n - n_B;
1721   n_vertices = pcbddc->n_vertices;
1722 
1723   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1724 
1725   /* create auxiliary bitmask and allocate workspace */
1726   if (!sub_schurs->reuse_mumps) {
1727     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1728     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1729     for (i=0;i<n_vertices;i++) {
1730       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1731     }
1732 
1733     for (i=0, n_R=0; i<pcis->n; i++) {
1734       if (!PetscBTLookup(bitmask,i)) {
1735         idx_R_local[n_R++] = i;
1736       }
1737     }
1738   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1739     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1740 
1741     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1742     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1743   }
1744 
1745   /* Block code */
1746   vbs = 1;
1747   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1748   if (bs>1 && !(n_vertices%bs)) {
1749     PetscBool is_blocked = PETSC_TRUE;
1750     PetscInt  *vary;
1751     if (!sub_schurs->reuse_mumps) {
1752       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1753       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1754       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1755       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1756       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1757       for (i=0; i<pcis->n/bs; i++) {
1758         if (vary[i]!=0 && vary[i]!=bs) {
1759           is_blocked = PETSC_FALSE;
1760           break;
1761         }
1762       }
1763       ierr = PetscFree(vary);CHKERRQ(ierr);
1764     } else {
1765       /* Verify directly the R set */
1766       for (i=0; i<n_R/bs; i++) {
1767         PetscInt j,node=idx_R_local[bs*i];
1768         for (j=1; j<bs; j++) {
1769           if (node != idx_R_local[bs*i+j]-j) {
1770             is_blocked = PETSC_FALSE;
1771             break;
1772           }
1773         }
1774       }
1775     }
1776     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1777       vbs = bs;
1778       for (i=0;i<n_R/vbs;i++) {
1779         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1780       }
1781     }
1782   }
1783   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1784   if (sub_schurs->reuse_mumps) {
1785     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1786 
1787     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1788     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1789     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1790     reuse_mumps->is_R = pcbddc->is_R_local;
1791   } else {
1792     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1793   }
1794 
1795   /* print some info if requested */
1796   if (pcbddc->dbg_flag) {
1797     PetscInt benign = 0;
1798 
1799     if (pcbddc->benign_p0_lidx >= 0) benign = 1;
1800     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1801     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1802     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1803     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1804     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1805     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr);
1806     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1807   }
1808 
1809   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1810   if (!sub_schurs->reuse_mumps) {
1811     IS       is_aux1,is_aux2;
1812     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1813 
1814     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1815     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1816     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1817     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1818     for (i=0; i<n_D; i++) {
1819       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1820     }
1821     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1822     for (i=0, j=0; i<n_R; i++) {
1823       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1824         aux_array1[j++] = i;
1825       }
1826     }
1827     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1828     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1829     for (i=0, j=0; i<n_B; i++) {
1830       if (!PetscBTLookup(bitmask,is_indices[i])) {
1831         aux_array2[j++] = i;
1832       }
1833     }
1834     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1835     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1836     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1837     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1838     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1839 
1840     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1841       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1842       for (i=0, j=0; i<n_R; i++) {
1843         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1844           aux_array1[j++] = i;
1845         }
1846       }
1847       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1848       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1849       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1850     }
1851     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1852     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1853   } else {
1854     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1855     IS               tis;
1856     PetscInt         schur_size;
1857 
1858     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1859     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1860     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1861     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1862     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1863       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1864       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1865       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1866     }
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1874 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1875 {
1876   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1877   PC_IS          *pcis = (PC_IS*)pc->data;
1878   PC             pc_temp;
1879   Mat            A_RR;
1880   MatReuse       reuse;
1881   PetscScalar    m_one = -1.0;
1882   PetscReal      value;
1883   PetscInt       n_D,n_R;
1884   PetscBool      use_exact,use_exact_reduced,issbaij;
1885   PetscErrorCode ierr;
1886   /* prefixes stuff */
1887   char           dir_prefix[256],neu_prefix[256],str_level[16];
1888   size_t         len;
1889 
1890   PetscFunctionBegin;
1891 
1892   /* compute prefixes */
1893   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1894   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1895   if (!pcbddc->current_level) {
1896     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1897     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1898     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1899     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1900   } else {
1901     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1902     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1903     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1904     len -= 15; /* remove "pc_bddc_coarse_" */
1905     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1906     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1907     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1908     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1909     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1910     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1911     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1912     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1913   }
1914 
1915   /* DIRICHLET PROBLEM */
1916   if (dirichlet) {
1917     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1918     if (pcbddc->local_mat->symmetric_set) {
1919       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1920     }
1921     /* Matrix for Dirichlet problem is pcis->A_II */
1922     n_D = pcis->n - pcis->n_B;
1923     if (!pcbddc->ksp_D) { /* create object if not yet build */
1924       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1925       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1926       /* default */
1927       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1928       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1929       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1930       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1931       if (issbaij) {
1932         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1933       } else {
1934         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1935       }
1936       /* Allow user's customization */
1937       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1938       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1939     }
1940     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1941     if (sub_schurs->reuse_mumps) {
1942       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1943 
1944       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1945     }
1946     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1947     if (!n_D) {
1948       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1949       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1950     }
1951     /* Set Up KSP for Dirichlet problem of BDDC */
1952     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1953     /* set ksp_D into pcis data */
1954     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1955     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1956     pcis->ksp_D = pcbddc->ksp_D;
1957   }
1958 
1959   /* NEUMANN PROBLEM */
1960   A_RR = 0;
1961   if (neumann) {
1962     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1963     PetscInt        ibs,mbs;
1964     PetscBool       issbaij;
1965     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1966     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1967     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1968     if (pcbddc->ksp_R) { /* already created ksp */
1969       PetscInt nn_R;
1970       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1971       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1972       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1973       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1974         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1975         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1976         reuse = MAT_INITIAL_MATRIX;
1977       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1978         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1979           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1980           reuse = MAT_INITIAL_MATRIX;
1981         } else { /* safe to reuse the matrix */
1982           reuse = MAT_REUSE_MATRIX;
1983         }
1984       }
1985       /* last check */
1986       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1987         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1988         reuse = MAT_INITIAL_MATRIX;
1989       }
1990     } else { /* first time, so we need to create the matrix */
1991       reuse = MAT_INITIAL_MATRIX;
1992     }
1993     /* extract A_RR */
1994     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1995     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1996     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1997     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1998       if (matis->A == pcbddc->local_mat) {
1999         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2000         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2001       } else {
2002         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2003       }
2004     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
2005       if (matis->A == pcbddc->local_mat) {
2006         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
2007         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2008       } else {
2009         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
2010       }
2011     }
2012     if (!sub_schurs->reuse_mumps) {
2013       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
2014       if (pcbddc->local_mat->symmetric_set) {
2015         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
2016       }
2017     } else {
2018       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2019 
2020       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2021       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
2022       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
2023     }
2024     if (!pcbddc->ksp_R) { /* create object if not present */
2025       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
2026       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
2027       /* default */
2028       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
2029       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
2030       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2031       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
2032       if (issbaij) {
2033         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
2034       } else {
2035         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
2036       }
2037       /* Allow user's customization */
2038       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
2039       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2040     }
2041     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
2042     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
2043     if (!n_R) {
2044       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
2045       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
2046     }
2047     /* Reuse MUMPS solver if it is present */
2048     if (sub_schurs->reuse_mumps) {
2049       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2050 
2051       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
2052     }
2053     /* Set Up KSP for Neumann problem of BDDC */
2054     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
2055   }
2056   /* free Neumann problem's matrix */
2057   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
2058 
2059   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
2060   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
2061     if (pcbddc->dbg_flag) {
2062       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2063       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2064       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2065     }
2066     if (dirichlet) { /* Dirichlet */
2067       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
2068       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
2069       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
2070       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
2071       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
2072       /* need to be adapted? */
2073       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2074       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2075       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
2076       /* print info */
2077       if (pcbddc->dbg_flag) {
2078         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
2079         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2080       }
2081       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
2082         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
2083       }
2084     }
2085     if (neumann) { /* Neumann */
2086       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
2087       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
2088       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
2089       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
2090       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
2091       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
2092       /* need to be adapted? */
2093       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
2094       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2095       /* print info */
2096       if (pcbddc->dbg_flag) {
2097         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
2098         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2099       }
2100       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
2101         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
2102       }
2103     }
2104   }
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
2110 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
2111 {
2112   PetscErrorCode  ierr;
2113   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2114   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2115 
2116   PetscFunctionBegin;
2117   if (!sub_schurs->reuse_mumps) {
2118     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
2119   }
2120   if (!pcbddc->switch_static) {
2121     if (applytranspose && pcbddc->local_auxmat1) {
2122       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2123       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2124     }
2125     if (!sub_schurs->reuse_mumps) {
2126       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2127       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2128     } else {
2129       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2130 
2131       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2132       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2133     }
2134   } else {
2135     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2136     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2137     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2138     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2139     if (applytranspose && pcbddc->local_auxmat1) {
2140       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
2141       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2142       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2143       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2144     }
2145   }
2146   if (!sub_schurs->reuse_mumps) {
2147     if (applytranspose) {
2148       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2149     } else {
2150       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2151     }
2152 #if defined(PETSC_HAVE_MUMPS)
2153   } else {
2154     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2155 
2156     if (applytranspose) {
2157       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2158     } else {
2159       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
2160     }
2161 #endif
2162   }
2163   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
2164   if (!pcbddc->switch_static) {
2165     if (!sub_schurs->reuse_mumps) {
2166       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2167       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2168     } else {
2169       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
2170 
2171       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2172       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2173     }
2174     if (!applytranspose && pcbddc->local_auxmat1) {
2175       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2176       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
2177     }
2178   } else {
2179     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2180     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2181     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2182     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183     if (!applytranspose && pcbddc->local_auxmat1) {
2184       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
2185       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
2186     }
2187     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2188     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2189     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2190     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
2196 #undef __FUNCT__
2197 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
2198 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
2199 {
2200   PetscErrorCode ierr;
2201   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
2202   PC_IS*            pcis = (PC_IS*)  (pc->data);
2203   const PetscScalar zero = 0.0;
2204 
2205   PetscFunctionBegin;
2206   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
2207   if (applytranspose) {
2208     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2209     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2210   } else {
2211     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2212     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2213   }
2214 
2215   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2216   if (pcbddc->benign_p0_lidx >= 0) {
2217     PetscScalar *array;
2218 
2219     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2220     array[pcbddc->local_primal_size-1] += pcbddc->benign_p0;
2221     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2222   }
2223 
2224   /* start communications from local primal nodes to rhs of coarse solver */
2225   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2226   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2227   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2228 
2229   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2230   /* TODO remove null space when doing multilevel */
2231   if (pcbddc->coarse_ksp) {
2232     Mat coarse_mat;
2233     Vec rhs,sol;
2234     MatNullSpace nullsp;
2235 
2236     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2237     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2238     ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
2239     ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr);
2240     if (nullsp) {
2241       ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr);
2242     }
2243     if (applytranspose) {
2244       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2245     } else {
2246       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2247     }
2248     if (nullsp) {
2249       ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr);
2250     }
2251   }
2252 
2253   /* Local solution on R nodes */
2254   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2255     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2256   }
2257 
2258   /* communications from coarse sol to local primal nodes */
2259   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2260   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2261 
2262   /* Sum contributions from two levels */
2263   if (applytranspose) {
2264     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2265     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2266   } else {
2267     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2268     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2269   }
2270   /* store p0 */
2271   if (pcbddc->benign_p0_lidx >= 0) {
2272     PetscScalar *array;
2273 
2274     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2275     pcbddc->benign_p0 = array[pcbddc->local_primal_size-1];
2276     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2277   }
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2283 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2284 {
2285   PetscErrorCode ierr;
2286   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2287   PetscScalar    *array;
2288   Vec            from,to;
2289 
2290   PetscFunctionBegin;
2291   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2292     from = pcbddc->coarse_vec;
2293     to = pcbddc->vec1_P;
2294     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2295       Vec tvec;
2296 
2297       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2298       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2299       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2300       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2301       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2302       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2303     }
2304   } else { /* from local to global -> put data in coarse right hand side */
2305     from = pcbddc->vec1_P;
2306     to = pcbddc->coarse_vec;
2307   }
2308   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2309   PetscFunctionReturn(0);
2310 }
2311 
2312 #undef __FUNCT__
2313 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2314 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2315 {
2316   PetscErrorCode ierr;
2317   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2318   PetscScalar    *array;
2319   Vec            from,to;
2320 
2321   PetscFunctionBegin;
2322   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2323     from = pcbddc->coarse_vec;
2324     to = pcbddc->vec1_P;
2325   } else { /* from local to global -> put data in coarse right hand side */
2326     from = pcbddc->vec1_P;
2327     to = pcbddc->coarse_vec;
2328   }
2329   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2330   if (smode == SCATTER_FORWARD) {
2331     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2332       Vec tvec;
2333 
2334       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2335       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2336       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2337       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2338     }
2339   } else {
2340     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2341      ierr = VecResetArray(from);CHKERRQ(ierr);
2342     }
2343   }
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 /* uncomment for testing purposes */
2348 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2349 #undef __FUNCT__
2350 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2351 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2352 {
2353   PetscErrorCode    ierr;
2354   PC_IS*            pcis = (PC_IS*)(pc->data);
2355   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2356   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2357   /* one and zero */
2358   PetscScalar       one=1.0,zero=0.0;
2359   /* space to store constraints and their local indices */
2360   PetscScalar       *constraints_data;
2361   PetscInt          *constraints_idxs,*constraints_idxs_B;
2362   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2363   PetscInt          *constraints_n;
2364   /* iterators */
2365   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2366   /* BLAS integers */
2367   PetscBLASInt      lwork,lierr;
2368   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2369   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2370   /* reuse */
2371   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2372   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2373   /* change of basis */
2374   PetscBool         qr_needed;
2375   PetscBT           change_basis,qr_needed_idx;
2376   /* auxiliary stuff */
2377   PetscInt          *nnz,*is_indices;
2378   PetscInt          ncc;
2379   /* some quantities */
2380   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2381   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2382 
2383   PetscFunctionBegin;
2384   /* Destroy Mat objects computed previously */
2385   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2386   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2387   /* save info on constraints from previous setup (if any) */
2388   olocal_primal_size = pcbddc->local_primal_size;
2389   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2390   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2391   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2392   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2393   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2394   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2395 
2396   /* print some info */
2397   if (pcbddc->dbg_flag) {
2398     IS       vertices;
2399     PetscInt nv,nedges,nfaces;
2400     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2401     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2402     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2403     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2404     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2405     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2406     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2407     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2408     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2409   }
2410 
2411   if (!pcbddc->adaptive_selection) {
2412     IS           ISForVertices,*ISForFaces,*ISForEdges;
2413     MatNullSpace nearnullsp;
2414     const Vec    *nearnullvecs;
2415     Vec          *localnearnullsp;
2416     PetscScalar  *array;
2417     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2418     PetscBool    nnsp_has_cnst;
2419     /* LAPACK working arrays for SVD or POD */
2420     PetscBool    skip_lapack,boolforchange;
2421     PetscScalar  *work;
2422     PetscReal    *singular_vals;
2423 #if defined(PETSC_USE_COMPLEX)
2424     PetscReal    *rwork;
2425 #endif
2426 #if defined(PETSC_MISSING_LAPACK_GESVD)
2427     PetscScalar  *temp_basis,*correlation_mat;
2428 #else
2429     PetscBLASInt dummy_int=1;
2430     PetscScalar  dummy_scalar=1.;
2431 #endif
2432 
2433     /* Get index sets for faces, edges and vertices from graph */
2434     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2435     /* free unneeded index sets */
2436     if (!pcbddc->use_vertices) {
2437       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2438     }
2439     if (!pcbddc->use_edges) {
2440       for (i=0;i<n_ISForEdges;i++) {
2441         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2442       }
2443       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2444       n_ISForEdges = 0;
2445     }
2446     if (!pcbddc->use_faces) {
2447       for (i=0;i<n_ISForFaces;i++) {
2448         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2449       }
2450       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2451       n_ISForFaces = 0;
2452     }
2453 
2454 #if defined(PETSC_USE_DEBUG)
2455     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2456        Also use_change_of_basis should be consistent among processors */
2457     if (pcbddc->NullSpace) {
2458       PetscBool tbool[2],gbool[2];
2459 
2460       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2461         pcbddc->use_change_of_basis = PETSC_TRUE;
2462         if (!ISForEdges) {
2463           pcbddc->use_change_on_faces = PETSC_TRUE;
2464         }
2465       }
2466       tbool[0] = pcbddc->use_change_of_basis;
2467       tbool[1] = pcbddc->use_change_on_faces;
2468       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2469       pcbddc->use_change_of_basis = gbool[0];
2470       pcbddc->use_change_on_faces = gbool[1];
2471     }
2472 #endif
2473 
2474     /* check if near null space is attached to global mat */
2475     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2476     if (nearnullsp) {
2477       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2478       /* remove any stored info */
2479       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2480       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2481       /* store information for BDDC solver reuse */
2482       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2483       pcbddc->onearnullspace = nearnullsp;
2484       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2485       for (i=0;i<nnsp_size;i++) {
2486         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2487       }
2488     } else { /* if near null space is not provided BDDC uses constants by default */
2489       nnsp_size = 0;
2490       nnsp_has_cnst = PETSC_TRUE;
2491     }
2492     /* get max number of constraints on a single cc */
2493     max_constraints = nnsp_size;
2494     if (nnsp_has_cnst) max_constraints++;
2495 
2496     /*
2497          Evaluate maximum storage size needed by the procedure
2498          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2499          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2500          There can be multiple constraints per connected component
2501                                                                                                                                                            */
2502     n_vertices = 0;
2503     if (ISForVertices) {
2504       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2505     }
2506     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2507     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2508 
2509     total_counts = n_ISForFaces+n_ISForEdges;
2510     total_counts *= max_constraints;
2511     total_counts += n_vertices;
2512     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2513 
2514     total_counts = 0;
2515     max_size_of_constraint = 0;
2516     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2517       IS used_is;
2518       if (i<n_ISForEdges) {
2519         used_is = ISForEdges[i];
2520       } else {
2521         used_is = ISForFaces[i-n_ISForEdges];
2522       }
2523       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2524       total_counts += j;
2525       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2526     }
2527     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2528 
2529     /* get local part of global near null space vectors */
2530     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2531     for (k=0;k<nnsp_size;k++) {
2532       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2533       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2534       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2535     }
2536 
2537     /* whether or not to skip lapack calls */
2538     skip_lapack = PETSC_TRUE;
2539     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2540 
2541     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2542     if (!skip_lapack) {
2543       PetscScalar temp_work;
2544 
2545 #if defined(PETSC_MISSING_LAPACK_GESVD)
2546       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2547       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2548       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2549       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2550 #if defined(PETSC_USE_COMPLEX)
2551       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2552 #endif
2553       /* now we evaluate the optimal workspace using query with lwork=-1 */
2554       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2555       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2556       lwork = -1;
2557       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2558 #if !defined(PETSC_USE_COMPLEX)
2559       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2560 #else
2561       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2562 #endif
2563       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2564       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2565 #else /* on missing GESVD */
2566       /* SVD */
2567       PetscInt max_n,min_n;
2568       max_n = max_size_of_constraint;
2569       min_n = max_constraints;
2570       if (max_size_of_constraint < max_constraints) {
2571         min_n = max_size_of_constraint;
2572         max_n = max_constraints;
2573       }
2574       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2575 #if defined(PETSC_USE_COMPLEX)
2576       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2577 #endif
2578       /* now we evaluate the optimal workspace using query with lwork=-1 */
2579       lwork = -1;
2580       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2581       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2582       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2583       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2584 #if !defined(PETSC_USE_COMPLEX)
2585       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2586 #else
2587       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2588 #endif
2589       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2590       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2591 #endif /* on missing GESVD */
2592       /* Allocate optimal workspace */
2593       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2594       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2595     }
2596     /* Now we can loop on constraining sets */
2597     total_counts = 0;
2598     constraints_idxs_ptr[0] = 0;
2599     constraints_data_ptr[0] = 0;
2600     /* vertices */
2601     if (n_vertices) {
2602       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2603       ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2604       for (i=0;i<n_vertices;i++) {
2605         constraints_n[total_counts] = 1;
2606         constraints_data[total_counts] = 1.0;
2607         constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2608         constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2609         total_counts++;
2610       }
2611       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2612       n_vertices = total_counts;
2613     }
2614 
2615     /* edges and faces */
2616     total_counts_cc = total_counts;
2617     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2618       IS        used_is;
2619       PetscBool idxs_copied = PETSC_FALSE;
2620 
2621       if (ncc<n_ISForEdges) {
2622         used_is = ISForEdges[ncc];
2623         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2624       } else {
2625         used_is = ISForFaces[ncc-n_ISForEdges];
2626         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2627       }
2628       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2629 
2630       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2631       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2632       /* change of basis should not be performed on local periodic nodes */
2633       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2634       if (nnsp_has_cnst) {
2635         PetscScalar quad_value;
2636 
2637         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2638         idxs_copied = PETSC_TRUE;
2639 
2640         if (!pcbddc->use_nnsp_true) {
2641           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2642         } else {
2643           quad_value = 1.0;
2644         }
2645         for (j=0;j<size_of_constraint;j++) {
2646           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2647         }
2648         temp_constraints++;
2649         total_counts++;
2650       }
2651       for (k=0;k<nnsp_size;k++) {
2652         PetscReal real_value;
2653         PetscScalar *ptr_to_data;
2654 
2655         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2656         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2657         for (j=0;j<size_of_constraint;j++) {
2658           ptr_to_data[j] = array[is_indices[j]];
2659         }
2660         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2661         /* check if array is null on the connected component */
2662         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2663         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2664         if (real_value > 0.0) { /* keep indices and values */
2665           temp_constraints++;
2666           total_counts++;
2667           if (!idxs_copied) {
2668             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2669             idxs_copied = PETSC_TRUE;
2670           }
2671         }
2672       }
2673       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2674       valid_constraints = temp_constraints;
2675       if (!pcbddc->use_nnsp_true && temp_constraints) {
2676         if (temp_constraints == 1) { /* just normalize the constraint */
2677           PetscScalar norm,*ptr_to_data;
2678 
2679           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2680           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2681           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2682           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2683           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2684         } else { /* perform SVD */
2685           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2686           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2687 
2688 #if defined(PETSC_MISSING_LAPACK_GESVD)
2689           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2690              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2691              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2692                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2693                 from that computed using LAPACKgesvd
2694              -> This is due to a different computation of eigenvectors in LAPACKheev
2695              -> The quality of the POD-computed basis will be the same */
2696           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2697           /* Store upper triangular part of correlation matrix */
2698           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2699           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2700           for (j=0;j<temp_constraints;j++) {
2701             for (k=0;k<j+1;k++) {
2702               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2703             }
2704           }
2705           /* compute eigenvalues and eigenvectors of correlation matrix */
2706           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2707           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2708 #if !defined(PETSC_USE_COMPLEX)
2709           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2710 #else
2711           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2712 #endif
2713           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2714           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2715           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2716           j = 0;
2717           while (j < temp_constraints && singular_vals[j] < tol) j++;
2718           total_counts = total_counts-j;
2719           valid_constraints = temp_constraints-j;
2720           /* scale and copy POD basis into used quadrature memory */
2721           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2722           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2723           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2724           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2725           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2726           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2727           if (j<temp_constraints) {
2728             PetscInt ii;
2729             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2730             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2731             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2732             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2733             for (k=0;k<temp_constraints-j;k++) {
2734               for (ii=0;ii<size_of_constraint;ii++) {
2735                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2736               }
2737             }
2738           }
2739 #else  /* on missing GESVD */
2740           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2741           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2742           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2743           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2744 #if !defined(PETSC_USE_COMPLEX)
2745           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2746 #else
2747           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2748 #endif
2749           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2750           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2751           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2752           k = temp_constraints;
2753           if (k > size_of_constraint) k = size_of_constraint;
2754           j = 0;
2755           while (j < k && singular_vals[k-j-1] < tol) j++;
2756           valid_constraints = k-j;
2757           total_counts = total_counts-temp_constraints+valid_constraints;
2758 #endif /* on missing GESVD */
2759         }
2760       }
2761       /* update pointers information */
2762       if (valid_constraints) {
2763         constraints_n[total_counts_cc] = valid_constraints;
2764         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2765         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2766         /* set change_of_basis flag */
2767         if (boolforchange) {
2768           PetscBTSet(change_basis,total_counts_cc);
2769         }
2770         total_counts_cc++;
2771       }
2772     }
2773     /* free workspace */
2774     if (!skip_lapack) {
2775       ierr = PetscFree(work);CHKERRQ(ierr);
2776 #if defined(PETSC_USE_COMPLEX)
2777       ierr = PetscFree(rwork);CHKERRQ(ierr);
2778 #endif
2779       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2780 #if defined(PETSC_MISSING_LAPACK_GESVD)
2781       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2782       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2783 #endif
2784     }
2785     for (k=0;k<nnsp_size;k++) {
2786       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2787     }
2788     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2789     /* free index sets of faces, edges and vertices */
2790     for (i=0;i<n_ISForFaces;i++) {
2791       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2792     }
2793     if (n_ISForFaces) {
2794       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2795     }
2796     for (i=0;i<n_ISForEdges;i++) {
2797       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2798     }
2799     if (n_ISForEdges) {
2800       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2801     }
2802     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2803   } else {
2804     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2805 
2806     total_counts = 0;
2807     n_vertices = 0;
2808     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2809       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2810     }
2811     max_constraints = 0;
2812     total_counts_cc = 0;
2813     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2814       total_counts += pcbddc->adaptive_constraints_n[i];
2815       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2816       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2817     }
2818     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2819     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2820     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2821     constraints_data = pcbddc->adaptive_constraints_data;
2822     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2823     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2824     total_counts_cc = 0;
2825     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2826       if (pcbddc->adaptive_constraints_n[i]) {
2827         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2828       }
2829     }
2830 #if 0
2831     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2832     for (i=0;i<total_counts_cc;i++) {
2833       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2834       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2835       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2836         printf(" %d",constraints_idxs[j]);
2837       }
2838       printf("\n");
2839       printf("number of cc: %d\n",constraints_n[i]);
2840     }
2841     for (i=0;i<n_vertices;i++) {
2842       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2843     }
2844     for (i=0;i<sub_schurs->n_subs;i++) {
2845       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2846     }
2847 #endif
2848 
2849     max_size_of_constraint = 0;
2850     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2851     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2852     /* Change of basis */
2853     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2854     if (pcbddc->use_change_of_basis) {
2855       for (i=0;i<sub_schurs->n_subs;i++) {
2856         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2857           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2858         }
2859       }
2860     }
2861   }
2862   pcbddc->local_primal_size = total_counts;
2863   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2864   ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2865 
2866   /* map constraints_idxs in boundary numbering */
2867   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2868   if (i != constraints_idxs_ptr[total_counts_cc]) {
2869     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2870   }
2871 
2872   /* Create constraint matrix */
2873   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2874   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2875   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2876 
2877   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2878   /* determine if a QR strategy is needed for change of basis */
2879   qr_needed = PETSC_FALSE;
2880   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2881   total_primal_vertices=0;
2882   pcbddc->local_primal_size_cc = 0;
2883   for (i=0;i<total_counts_cc;i++) {
2884     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2885     if (size_of_constraint == 1) {
2886       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2887       pcbddc->local_primal_size_cc += 1;
2888     } else if (PetscBTLookup(change_basis,i)) {
2889       for (k=0;k<constraints_n[i];k++) {
2890         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2891       }
2892       pcbddc->local_primal_size_cc += constraints_n[i];
2893       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2894         PetscBTSet(qr_needed_idx,i);
2895         qr_needed = PETSC_TRUE;
2896       }
2897     } else {
2898       pcbddc->local_primal_size_cc += 1;
2899     }
2900   }
2901   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2902   pcbddc->n_vertices = total_primal_vertices;
2903   /* permute indices in order to have a sorted set of vertices */
2904   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2905 
2906   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2907   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2908   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2909   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2910 
2911   /* nonzero structure of constraint matrix */
2912   /* and get reference dof for local constraints */
2913   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2914   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2915 
2916   j = total_primal_vertices;
2917   total_counts = total_primal_vertices;
2918   cum = total_primal_vertices;
2919   for (i=n_vertices;i<total_counts_cc;i++) {
2920     if (!PetscBTLookup(change_basis,i)) {
2921       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2922       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2923       cum++;
2924       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2925       for (k=0;k<constraints_n[i];k++) {
2926         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2927         nnz[j+k] = size_of_constraint;
2928       }
2929       j += constraints_n[i];
2930     }
2931   }
2932   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2933   ierr = PetscFree(nnz);CHKERRQ(ierr);
2934 
2935   /* set values in constraint matrix */
2936   for (i=0;i<total_primal_vertices;i++) {
2937     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2938   }
2939   total_counts = total_primal_vertices;
2940   for (i=n_vertices;i<total_counts_cc;i++) {
2941     if (!PetscBTLookup(change_basis,i)) {
2942       PetscInt *cols;
2943 
2944       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2945       cols = constraints_idxs+constraints_idxs_ptr[i];
2946       for (k=0;k<constraints_n[i];k++) {
2947         PetscInt    row = total_counts+k;
2948         PetscScalar *vals;
2949 
2950         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2951         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2952       }
2953       total_counts += constraints_n[i];
2954     }
2955   }
2956   /* assembling */
2957   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2958   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959 
2960   /*
2961   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2962   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2963   */
2964   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2965   if (pcbddc->use_change_of_basis) {
2966     /* dual and primal dofs on a single cc */
2967     PetscInt     dual_dofs,primal_dofs;
2968     /* working stuff for GEQRF */
2969     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2970     PetscBLASInt lqr_work;
2971     /* working stuff for UNGQR */
2972     PetscScalar  *gqr_work,lgqr_work_t;
2973     PetscBLASInt lgqr_work;
2974     /* working stuff for TRTRS */
2975     PetscScalar  *trs_rhs;
2976     PetscBLASInt Blas_NRHS;
2977     /* pointers for values insertion into change of basis matrix */
2978     PetscInt     *start_rows,*start_cols;
2979     PetscScalar  *start_vals;
2980     /* working stuff for values insertion */
2981     PetscBT      is_primal;
2982     PetscInt     *aux_primal_numbering_B;
2983     /* matrix sizes */
2984     PetscInt     global_size,local_size;
2985     /* temporary change of basis */
2986     Mat          localChangeOfBasisMatrix;
2987     /* extra space for debugging */
2988     PetscScalar  *dbg_work;
2989 
2990     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2991     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2992     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2993     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2994     /* nonzeros for local mat */
2995     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2996     for (i=0;i<pcis->n;i++) nnz[i]=1;
2997     for (i=n_vertices;i<total_counts_cc;i++) {
2998       if (PetscBTLookup(change_basis,i)) {
2999         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
3000         if (PetscBTLookup(qr_needed_idx,i)) {
3001           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
3002         } else {
3003           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
3004           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
3005         }
3006       }
3007     }
3008     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
3009     ierr = PetscFree(nnz);CHKERRQ(ierr);
3010     /* Set initial identity in the matrix */
3011     for (i=0;i<pcis->n;i++) {
3012       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
3013     }
3014 
3015     if (pcbddc->dbg_flag) {
3016       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
3017       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3018     }
3019 
3020 
3021     /* Now we loop on the constraints which need a change of basis */
3022     /*
3023        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
3024        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
3025 
3026        Basic blocks of change of basis matrix T computed by
3027 
3028           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
3029 
3030             | 1        0   ...        0         s_1/S |
3031             | 0        1   ...        0         s_2/S |
3032             |              ...                        |
3033             | 0        ...            1     s_{n-1}/S |
3034             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
3035 
3036             with S = \sum_{i=1}^n s_i^2
3037             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
3038                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
3039 
3040           - QR decomposition of constraints otherwise
3041     */
3042     if (qr_needed) {
3043       /* space to store Q */
3044       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
3045       /* first we issue queries for optimal work */
3046       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3047       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
3048       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3049       lqr_work = -1;
3050       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
3051       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
3052       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
3053       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
3054       lgqr_work = -1;
3055       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
3056       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
3057       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
3058       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3059       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
3060       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
3061       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
3062       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
3063       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
3064       /* array to store scaling factors for reflectors */
3065       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
3066       /* array to store rhs and solution of triangular solver */
3067       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
3068       /* allocating workspace for check */
3069       if (pcbddc->dbg_flag) {
3070         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
3071       }
3072     }
3073     /* array to store whether a node is primal or not */
3074     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
3075     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
3076     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
3077     if (i != total_primal_vertices) {
3078       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
3079     }
3080     for (i=0;i<total_primal_vertices;i++) {
3081       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
3082     }
3083     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
3084 
3085     /* loop on constraints and see whether or not they need a change of basis and compute it */
3086     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
3087       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
3088       if (PetscBTLookup(change_basis,total_counts)) {
3089         /* get constraint info */
3090         primal_dofs = constraints_n[total_counts];
3091         dual_dofs = size_of_constraint-primal_dofs;
3092 
3093         if (pcbddc->dbg_flag) {
3094           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
3095         }
3096 
3097         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
3098 
3099           /* copy quadrature constraints for change of basis check */
3100           if (pcbddc->dbg_flag) {
3101             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3102           }
3103           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
3104           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3105 
3106           /* compute QR decomposition of constraints */
3107           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3108           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3109           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3110           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3111           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
3112           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
3113           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3114 
3115           /* explictly compute R^-T */
3116           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
3117           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
3118           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3119           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
3120           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3121           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3122           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3123           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
3124           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
3125           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3126 
3127           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
3128           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3129           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3130           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3131           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3132           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3133           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
3134           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
3135           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3136 
3137           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
3138              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
3139              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
3140           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
3141           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
3142           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
3143           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3144           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
3145           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
3146           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3147           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
3148           ierr = PetscFPTrapPop();CHKERRQ(ierr);
3149           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
3150 
3151           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
3152           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
3153           /* insert cols for primal dofs */
3154           for (j=0;j<primal_dofs;j++) {
3155             start_vals = &qr_basis[j*size_of_constraint];
3156             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3157             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3158           }
3159           /* insert cols for dual dofs */
3160           for (j=0,k=0;j<dual_dofs;k++) {
3161             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
3162               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
3163               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3164               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
3165               j++;
3166             }
3167           }
3168 
3169           /* check change of basis */
3170           if (pcbddc->dbg_flag) {
3171             PetscInt   ii,jj;
3172             PetscBool valid_qr=PETSC_TRUE;
3173             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
3174             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3175             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
3176             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
3177             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
3178             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
3179             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
3180             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
3181             ierr = PetscFPTrapPop();CHKERRQ(ierr);
3182             for (jj=0;jj<size_of_constraint;jj++) {
3183               for (ii=0;ii<primal_dofs;ii++) {
3184                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
3185                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
3186               }
3187             }
3188             if (!valid_qr) {
3189               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
3190               for (jj=0;jj<size_of_constraint;jj++) {
3191                 for (ii=0;ii<primal_dofs;ii++) {
3192                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
3193                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3194                   }
3195                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
3196                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
3197                   }
3198                 }
3199               }
3200             } else {
3201               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3202             }
3203           }
3204         } else { /* simple transformation block */
3205           PetscInt    row,col;
3206           PetscScalar val,norm;
3207 
3208           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3209           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3210           for (j=0;j<size_of_constraint;j++) {
3211             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3212             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3213             if (!PetscBTLookup(is_primal,row_B)) {
3214               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3215               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3216               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3217             } else {
3218               for (k=0;k<size_of_constraint;k++) {
3219                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3220                 if (row != col) {
3221                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3222                 } else {
3223                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3224                 }
3225                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3226               }
3227             }
3228           }
3229           if (pcbddc->dbg_flag) {
3230             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3231           }
3232         }
3233       } else {
3234         if (pcbddc->dbg_flag) {
3235           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3236         }
3237       }
3238     }
3239 
3240     /* free workspace */
3241     if (qr_needed) {
3242       if (pcbddc->dbg_flag) {
3243         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3244       }
3245       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3246       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3247       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3248       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3249       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3250     }
3251     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3252     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3253     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3254 
3255     /* assembling of global change of variable */
3256     {
3257       Mat      tmat;
3258       PetscInt bs;
3259 
3260       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3261       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3262       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3263       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3264       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3265       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3266       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3267       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3268       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3269       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3270       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3271       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3272       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3273       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3274       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3275       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3276       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3277       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3278     }
3279     /* check */
3280     if (pcbddc->dbg_flag) {
3281       PetscReal error;
3282       Vec       x,x_change;
3283 
3284       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3285       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3286       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3287       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3288       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3289       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3290       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3291       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3292       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3293       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3294       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3295       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3296       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3297       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3298       ierr = VecDestroy(&x);CHKERRQ(ierr);
3299       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3300     }
3301 
3302     /* adapt sub_schurs computed (if any) */
3303     if (pcbddc->use_deluxe_scaling) {
3304       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3305       if (sub_schurs->S_Ej_all) {
3306         Mat                    S_new,tmat;
3307         ISLocalToGlobalMapping NtoSall;
3308         IS                     is_all_N,is_V,is_V_Sall;
3309         const PetscScalar      *array;
3310         const PetscInt         *idxs_V,*idxs_all;
3311         PetscInt               i,n_V;
3312 
3313         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3314         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3315         ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr);
3316         ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr);
3317         ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr);
3318         ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr);
3319         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3320         ierr = ISDestroy(&is_V);CHKERRQ(ierr);
3321         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3322         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3323         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3324         ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3325         ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr);
3326         ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3327         ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3328         ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr);
3329         for (i=0;i<n_V;i++) {
3330           PetscScalar val;
3331           PetscInt    idx;
3332 
3333           idx = idxs_V[i];
3334           val = array[idxs_all[idxs_V[i]]];
3335           ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr);
3336         }
3337         ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3338         ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3339         sub_schurs->S_Ej_all = S_new;
3340         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3341         if (sub_schurs->sum_S_Ej_all) {
3342           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3343           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3344           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3345           ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr);
3346           sub_schurs->sum_S_Ej_all = S_new;
3347           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3348         }
3349         ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr);
3350         ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr);
3351         ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr);
3352         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3353         ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr);
3354       }
3355     }
3356     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3357   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3358     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3359     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3360   }
3361 
3362   /* set up change of basis context */
3363   if (pcbddc->ChangeOfBasisMatrix) {
3364     PCBDDCChange_ctx change_ctx;
3365 
3366     if (!pcbddc->new_global_mat) {
3367       PetscInt global_size,local_size;
3368 
3369       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3370       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3371       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3372       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3373       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3374       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3375       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3376       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3377       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3378     } else {
3379       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3380       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3381       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3382     }
3383     if (!pcbddc->user_ChangeOfBasisMatrix) {
3384       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3385       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3386     } else {
3387       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3388       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3389     }
3390     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3391     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3392     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3393     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3394   }
3395 
3396   /* add pressure dof to set of primal nodes for numbering purposes */
3397   if (pcbddc->benign_p0_lidx >= 0) {
3398     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx;
3399     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx;
3400     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3401     pcbddc->local_primal_size_cc++;
3402     pcbddc->local_primal_size++;
3403   }
3404 
3405   /* check if a new primal space has been introduced (also take into account benign trick) */
3406   pcbddc->new_primal_space_local = PETSC_TRUE;
3407   if (olocal_primal_size == pcbddc->local_primal_size) {
3408     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3409     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3410     if (!pcbddc->new_primal_space_local) {
3411       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3412       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3413     }
3414   }
3415   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3416   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3417   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3418 
3419   /* flush dbg viewer */
3420   if (pcbddc->dbg_flag) {
3421     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3422   }
3423 
3424   /* free workspace */
3425   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3426   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3427   if (!pcbddc->adaptive_selection) {
3428     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3429     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3430   } else {
3431     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3432                       pcbddc->adaptive_constraints_idxs_ptr,
3433                       pcbddc->adaptive_constraints_data_ptr,
3434                       pcbddc->adaptive_constraints_idxs,
3435                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3436     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3437     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3438   }
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 #undef __FUNCT__
3443 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3444 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3445 {
3446   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3447   PC_IS       *pcis = (PC_IS*)pc->data;
3448   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3449   PetscInt    ierr,i,vertex_size,N;
3450   PetscViewer viewer=pcbddc->dbg_viewer;
3451 
3452   PetscFunctionBegin;
3453   /* Reset previously computed graph */
3454   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3455   /* Init local Graph struct */
3456   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3457   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3458 
3459   /* Check validity of the csr graph passed in by the user */
3460   if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3461     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
3462   }
3463 
3464   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3465   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3466     PetscInt  *xadj,*adjncy;
3467     PetscInt  nvtxs;
3468     PetscBool flg_row=PETSC_FALSE;
3469 
3470     if (pcbddc->use_local_adj) {
3471 
3472       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3473       if (flg_row) {
3474         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3475         pcbddc->computed_rowadj = PETSC_TRUE;
3476       }
3477       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3478     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3479       IS                     is_dummy;
3480       ISLocalToGlobalMapping l2gmap_dummy;
3481       PetscInt               j,sum;
3482       PetscInt               *cxadj,*cadjncy;
3483       const PetscInt         *idxs;
3484       PCBDDCGraph            graph;
3485       PetscBT                is_on_boundary;
3486 
3487       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3488       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3489       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3490       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3491       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3492       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3493       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3494       if (flg_row) {
3495         graph->xadj = xadj;
3496         graph->adjncy = adjncy;
3497       }
3498       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3499       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3500       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3501 
3502       if (pcbddc->dbg_flag) {
3503         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3504         for (i=0;i<graph->ncc;i++) {
3505           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3506         }
3507       }
3508 
3509       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3510       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3511       for (i=0;i<pcis->n_B;i++) {
3512         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3513       }
3514       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3515 
3516       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3517       sum = 0;
3518       for (i=0;i<graph->ncc;i++) {
3519         PetscInt sizecc = 0;
3520         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3521           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3522             sizecc++;
3523           }
3524         }
3525         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3526           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3527             cxadj[graph->queue[j]] = sizecc;
3528           }
3529         }
3530         sum += sizecc*sizecc;
3531       }
3532       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3533       sum = 0;
3534       for (i=0;i<pcis->n;i++) {
3535         PetscInt temp = cxadj[i];
3536         cxadj[i] = sum;
3537         sum += temp;
3538       }
3539       cxadj[pcis->n] = sum;
3540       for (i=0;i<graph->ncc;i++) {
3541         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3542           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3543             PetscInt k,sizecc = 0;
3544             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3545               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3546                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3547                 sizecc++;
3548               }
3549             }
3550           }
3551         }
3552       }
3553       if (sum) {
3554         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3555       } else {
3556         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3557         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3558       }
3559       graph->xadj = 0;
3560       graph->adjncy = 0;
3561       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3562       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3563     }
3564   }
3565   if (pcbddc->dbg_flag) {
3566     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3567   }
3568 
3569   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3570   vertex_size = 1;
3571   if (pcbddc->user_provided_isfordofs) {
3572     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3573       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3574       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3575         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3576         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3577       }
3578       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3579       pcbddc->n_ISForDofs = 0;
3580       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3581     }
3582     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3583     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3584   } else {
3585     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3586       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3587       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3588       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3589         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3590       }
3591     }
3592   }
3593 
3594   /* Setup of Graph */
3595   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3596     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3597   }
3598   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3599     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3600   }
3601   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3602 
3603   /* Graph's connected components analysis */
3604   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3605 
3606   /* print some info to stdout */
3607   if (pcbddc->dbg_flag) {
3608     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3609   }
3610 
3611   /* mark topography has done */
3612   pcbddc->recompute_topography = PETSC_FALSE;
3613   PetscFunctionReturn(0);
3614 }
3615 
3616 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3617 #undef __FUNCT__
3618 #define __FUNCT__ "PCBDDCSubsetNumbering"
3619 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3620 {
3621   PetscSF        sf;
3622   PetscLayout    map;
3623   const PetscInt *idxs;
3624   PetscInt       *leaf_data,*root_data,*gidxs;
3625   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3626   PetscInt       n_n,nlocals,start,first_index;
3627   PetscMPIInt    commsize;
3628   PetscBool      first_found;
3629   PetscErrorCode ierr;
3630 
3631   PetscFunctionBegin;
3632   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3633   if (subset_mult) {
3634     PetscCheckSameComm(subset,1,subset_mult,2);
3635     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3636     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3637   }
3638   /* create workspace layout for computing global indices of subset */
3639   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3640   lbounds[0] = lbounds[1] = 0;
3641   for (i=0;i<n;i++) {
3642     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3643     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3644   }
3645   lbounds[0] = -lbounds[0];
3646   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3647   gbounds[0] = -gbounds[0];
3648   N = gbounds[1] - gbounds[0] + 1;
3649   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3650   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3651   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3652   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3653   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3654 
3655   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3656   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3657   if (subset_mult) {
3658     const PetscInt* idxs_mult;
3659 
3660     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3661     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3662     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3663   } else {
3664     for (i=0;i<n;i++) leaf_data[i] = 1;
3665   }
3666   /* local size of new subset */
3667   n_n = 0;
3668   for (i=0;i<n;i++) n_n += leaf_data[i];
3669 
3670   /* global indexes in layout */
3671   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3672   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3673   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3674   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3675   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3676   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3677 
3678   /* reduce from leaves to roots */
3679   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3680   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3681   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3682 
3683   /* count indexes in local part of layout */
3684   nlocals = 0;
3685   first_index = -1;
3686   first_found = PETSC_FALSE;
3687   for (i=0;i<Nl;i++) {
3688     if (!first_found && root_data[i]) {
3689       first_found = PETSC_TRUE;
3690       first_index = i;
3691     }
3692     nlocals += root_data[i];
3693   }
3694 
3695   /* cumulative of number of indexes and size of subset without holes */
3696 #if defined(PETSC_HAVE_MPI_EXSCAN)
3697   start = 0;
3698   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3699 #else
3700   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3701   start = start-nlocals;
3702 #endif
3703 
3704   if (N_n) { /* compute total size of new subset if requested */
3705     *N_n = start + nlocals;
3706     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3707     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3708   }
3709 
3710   /* adapt root data with cumulative */
3711   if (first_found) {
3712     PetscInt old_index;
3713 
3714     root_data[first_index] += start;
3715     old_index = first_index;
3716     for (i=first_index+1;i<Nl;i++) {
3717       if (root_data[i]) {
3718         root_data[i] += root_data[old_index];
3719         old_index = i;
3720       }
3721     }
3722   }
3723 
3724   /* from roots to leaves */
3725   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3726   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3727   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3728 
3729   /* create new IS with global indexes without holes */
3730   if (subset_mult) {
3731     const PetscInt* idxs_mult;
3732     PetscInt        cum;
3733 
3734     cum = 0;
3735     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3736     for (i=0;i<n;i++) {
3737       PetscInt j;
3738       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3739     }
3740     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3741   } else {
3742     for (i=0;i<n;i++) {
3743       gidxs[i] = leaf_data[i]-1;
3744     }
3745   }
3746   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3747   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3748   PetscFunctionReturn(0);
3749 }
3750 
3751 #undef __FUNCT__
3752 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3753 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3754 {
3755   PetscInt       i,j;
3756   PetscScalar    *alphas;
3757   PetscErrorCode ierr;
3758 
3759   PetscFunctionBegin;
3760   /* this implements stabilized Gram-Schmidt */
3761   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3762   for (i=0;i<n;i++) {
3763     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3764     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3765     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3766   }
3767   ierr = PetscFree(alphas);CHKERRQ(ierr);
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 #undef __FUNCT__
3772 #define __FUNCT__ "MatISGetSubassemblingPattern"
3773 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3774 {
3775   IS             ranks_send_to;
3776   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3777   PetscMPIInt    size,rank,color;
3778   PetscInt       *xadj,*adjncy;
3779   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3780   PetscInt       i,local_size,threshold=0;
3781   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3782   PetscSubcomm   subcomm;
3783   PetscErrorCode ierr;
3784 
3785   PetscFunctionBegin;
3786   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3787   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3788   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3789 
3790   /* Get info on mapping */
3791   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3792   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3793 
3794   /* build local CSR graph of subdomains' connectivity */
3795   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3796   xadj[0] = 0;
3797   xadj[1] = PetscMax(n_neighs-1,0);
3798   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3799   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3800 
3801   if (threshold) {
3802     PetscInt xadj_count = 0;
3803     for (i=1;i<n_neighs;i++) {
3804       if (n_shared[i] > threshold) {
3805         adjncy[xadj_count] = neighs[i];
3806         adjncy_wgt[xadj_count] = n_shared[i];
3807         xadj_count++;
3808       }
3809     }
3810     xadj[1] = xadj_count;
3811   } else {
3812     if (xadj[1]) {
3813       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3814       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3815     }
3816   }
3817   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3818   if (use_square) {
3819     for (i=0;i<xadj[1];i++) {
3820       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3821     }
3822   }
3823   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3824 
3825   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3826 
3827   /*
3828     Restrict work on active processes only.
3829   */
3830   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3831   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3832   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3833   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3834   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3835   if (color) {
3836     ierr = PetscFree(xadj);CHKERRQ(ierr);
3837     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3838     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3839   } else {
3840     Mat             subdomain_adj;
3841     IS              new_ranks,new_ranks_contig;
3842     MatPartitioning partitioner;
3843     PetscInt        prank,rstart=0,rend=0;
3844     PetscInt        *is_indices,*oldranks;
3845     PetscBool       aggregate;
3846 
3847     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3848     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3849     prank = rank;
3850     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3851     /*
3852     for (i=0;i<size;i++) {
3853       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3854     }
3855     */
3856     for (i=0;i<xadj[1];i++) {
3857       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3858     }
3859     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3860     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3861     if (aggregate) {
3862       PetscInt    lrows,row,ncols,*cols;
3863       PetscMPIInt nrank;
3864       PetscScalar *vals;
3865 
3866       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3867       lrows = 0;
3868       if (nrank<redprocs) {
3869         lrows = size/redprocs;
3870         if (nrank<size%redprocs) lrows++;
3871       }
3872       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3873       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3874       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3875       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3876       row = nrank;
3877       ncols = xadj[1]-xadj[0];
3878       cols = adjncy;
3879       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3880       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3881       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3882       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3883       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3884       ierr = PetscFree(xadj);CHKERRQ(ierr);
3885       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3886       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3887       ierr = PetscFree(vals);CHKERRQ(ierr);
3888     } else {
3889       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3890     }
3891     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3892 
3893     /* Partition */
3894     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3895     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3896     if (use_vwgt) {
3897       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3898       v_wgt[0] = local_size;
3899       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3900     }
3901     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3902     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3903     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3904     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3905     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3906 
3907     /* renumber new_ranks to avoid "holes" in new set of processors */
3908     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3909     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3910     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3911     if (!redprocs) {
3912       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3913     } else {
3914       PetscInt    idxs[1];
3915       PetscMPIInt tag;
3916       MPI_Request *reqs;
3917 
3918       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3919       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3920       for (i=rstart;i<rend;i++) {
3921         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3922       }
3923       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3924       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3925       ierr = PetscFree(reqs);CHKERRQ(ierr);
3926       ranks_send_to_idx[0] = oldranks[idxs[0]];
3927     }
3928     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3929     /* clean up */
3930     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3931     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3932     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3933     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3934   }
3935   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3936 
3937   /* assemble parallel IS for sends */
3938   i = 1;
3939   if (color) i=0;
3940   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3941   /* get back IS */
3942   *is_sends = ranks_send_to;
3943   PetscFunctionReturn(0);
3944 }
3945 
3946 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3947 
3948 #undef __FUNCT__
3949 #define __FUNCT__ "MatISSubassemble"
3950 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3951 {
3952   Mat                    local_mat;
3953   IS                     is_sends_internal;
3954   PetscInt               rows,cols,new_local_rows;
3955   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3956   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3957   ISLocalToGlobalMapping l2gmap;
3958   PetscInt*              l2gmap_indices;
3959   const PetscInt*        is_indices;
3960   MatType                new_local_type;
3961   /* buffers */
3962   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3963   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3964   PetscInt               *recv_buffer_idxs_local;
3965   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3966   /* MPI */
3967   MPI_Comm               comm,comm_n;
3968   PetscSubcomm           subcomm;
3969   PetscMPIInt            n_sends,n_recvs,commsize;
3970   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3971   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3972   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3973   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3974   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3975   PetscErrorCode         ierr;
3976 
3977   PetscFunctionBegin;
3978   /* TODO: add missing checks */
3979   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3980   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3981   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3982   PetscValidLogicalCollectiveInt(mat,nis,7);
3983   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3984   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3985   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3986   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3987   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3988   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3989   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3990   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3991     PetscInt mrows,mcols,mnrows,mncols;
3992     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3993     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3994     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3995     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3996     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3997     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3998   }
3999   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
4000   PetscValidLogicalCollectiveInt(mat,bs,0);
4001   /* prepare IS for sending if not provided */
4002   if (!is_sends) {
4003     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
4004     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
4005   } else {
4006     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
4007     is_sends_internal = is_sends;
4008   }
4009 
4010   /* get comm */
4011   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
4012 
4013   /* compute number of sends */
4014   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
4015   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
4016 
4017   /* compute number of receives */
4018   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
4019   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
4020   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
4021   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4022   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
4023   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
4024   ierr = PetscFree(iflags);CHKERRQ(ierr);
4025 
4026   /* restrict comm if requested */
4027   subcomm = 0;
4028   destroy_mat = PETSC_FALSE;
4029   if (restrict_comm) {
4030     PetscMPIInt color,subcommsize;
4031 
4032     color = 0;
4033     if (restrict_full) {
4034       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
4035     } else {
4036       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
4037     }
4038     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
4039     subcommsize = commsize - subcommsize;
4040     /* check if reuse has been requested */
4041     if (reuse == MAT_REUSE_MATRIX) {
4042       if (*mat_n) {
4043         PetscMPIInt subcommsize2;
4044         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
4045         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
4046         comm_n = PetscObjectComm((PetscObject)*mat_n);
4047       } else {
4048         comm_n = PETSC_COMM_SELF;
4049       }
4050     } else { /* MAT_INITIAL_MATRIX */
4051       PetscMPIInt rank;
4052 
4053       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4054       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
4055       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
4056       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
4057       comm_n = PetscSubcommChild(subcomm);
4058     }
4059     /* flag to destroy *mat_n if not significative */
4060     if (color) destroy_mat = PETSC_TRUE;
4061   } else {
4062     comm_n = comm;
4063   }
4064 
4065   /* prepare send/receive buffers */
4066   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
4067   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
4068   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
4069   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
4070   if (nis) {
4071     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
4072   }
4073 
4074   /* Get data from local matrices */
4075   if (!isdense) {
4076     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
4077     /* TODO: See below some guidelines on how to prepare the local buffers */
4078     /*
4079        send_buffer_vals should contain the raw values of the local matrix
4080        send_buffer_idxs should contain:
4081        - MatType_PRIVATE type
4082        - PetscInt        size_of_l2gmap
4083        - PetscInt        global_row_indices[size_of_l2gmap]
4084        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
4085     */
4086   } else {
4087     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4088     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
4089     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
4090     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
4091     send_buffer_idxs[1] = i;
4092     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4093     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
4094     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
4095     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
4096     for (i=0;i<n_sends;i++) {
4097       ilengths_vals[is_indices[i]] = len*len;
4098       ilengths_idxs[is_indices[i]] = len+2;
4099     }
4100   }
4101   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
4102   /* additional is (if any) */
4103   if (nis) {
4104     PetscMPIInt psum;
4105     PetscInt j;
4106     for (j=0,psum=0;j<nis;j++) {
4107       PetscInt plen;
4108       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4109       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
4110       psum += len+1; /* indices + lenght */
4111     }
4112     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
4113     for (j=0,psum=0;j<nis;j++) {
4114       PetscInt plen;
4115       const PetscInt *is_array_idxs;
4116       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
4117       send_buffer_idxs_is[psum] = plen;
4118       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4119       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
4120       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
4121       psum += plen+1; /* indices + lenght */
4122     }
4123     for (i=0;i<n_sends;i++) {
4124       ilengths_idxs_is[is_indices[i]] = psum;
4125     }
4126     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
4127   }
4128 
4129   buf_size_idxs = 0;
4130   buf_size_vals = 0;
4131   buf_size_idxs_is = 0;
4132   for (i=0;i<n_recvs;i++) {
4133     buf_size_idxs += (PetscInt)olengths_idxs[i];
4134     buf_size_vals += (PetscInt)olengths_vals[i];
4135     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
4136   }
4137   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
4138   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
4139   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
4140 
4141   /* get new tags for clean communications */
4142   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
4143   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
4144   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
4145 
4146   /* allocate for requests */
4147   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
4148   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
4149   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
4150   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
4151   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
4152   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
4153 
4154   /* communications */
4155   ptr_idxs = recv_buffer_idxs;
4156   ptr_vals = recv_buffer_vals;
4157   ptr_idxs_is = recv_buffer_idxs_is;
4158   for (i=0;i<n_recvs;i++) {
4159     source_dest = onodes[i];
4160     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
4161     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
4162     ptr_idxs += olengths_idxs[i];
4163     ptr_vals += olengths_vals[i];
4164     if (nis) {
4165       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
4166       ptr_idxs_is += olengths_idxs_is[i];
4167     }
4168   }
4169   for (i=0;i<n_sends;i++) {
4170     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
4171     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
4172     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
4173     if (nis) {
4174       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
4175     }
4176   }
4177   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
4178   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
4179 
4180   /* assemble new l2g map */
4181   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4182   ptr_idxs = recv_buffer_idxs;
4183   new_local_rows = 0;
4184   for (i=0;i<n_recvs;i++) {
4185     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4186     ptr_idxs += olengths_idxs[i];
4187   }
4188   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
4189   ptr_idxs = recv_buffer_idxs;
4190   new_local_rows = 0;
4191   for (i=0;i<n_recvs;i++) {
4192     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
4193     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
4194     ptr_idxs += olengths_idxs[i];
4195   }
4196   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
4197   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
4198   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
4199 
4200   /* infer new local matrix type from received local matrices type */
4201   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
4202   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
4203   if (n_recvs) {
4204     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
4205     ptr_idxs = recv_buffer_idxs;
4206     for (i=0;i<n_recvs;i++) {
4207       if ((PetscInt)new_local_type_private != *ptr_idxs) {
4208         new_local_type_private = MATAIJ_PRIVATE;
4209         break;
4210       }
4211       ptr_idxs += olengths_idxs[i];
4212     }
4213     switch (new_local_type_private) {
4214       case MATDENSE_PRIVATE:
4215         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
4216           new_local_type = MATSEQAIJ;
4217           bs = 1;
4218         } else { /* if I receive only 1 dense matrix */
4219           new_local_type = MATSEQDENSE;
4220           bs = 1;
4221         }
4222         break;
4223       case MATAIJ_PRIVATE:
4224         new_local_type = MATSEQAIJ;
4225         bs = 1;
4226         break;
4227       case MATBAIJ_PRIVATE:
4228         new_local_type = MATSEQBAIJ;
4229         break;
4230       case MATSBAIJ_PRIVATE:
4231         new_local_type = MATSEQSBAIJ;
4232         break;
4233       default:
4234         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4235         break;
4236     }
4237   } else { /* by default, new_local_type is seqdense */
4238     new_local_type = MATSEQDENSE;
4239     bs = 1;
4240   }
4241 
4242   /* create MATIS object if needed */
4243   if (reuse == MAT_INITIAL_MATRIX) {
4244     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4245     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4246   } else {
4247     /* it also destroys the local matrices */
4248     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4249   }
4250   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4251   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4252 
4253   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4254 
4255   /* Global to local map of received indices */
4256   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4257   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4258   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4259 
4260   /* restore attributes -> type of incoming data and its size */
4261   buf_size_idxs = 0;
4262   for (i=0;i<n_recvs;i++) {
4263     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4264     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4265     buf_size_idxs += (PetscInt)olengths_idxs[i];
4266   }
4267   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4268 
4269   /* set preallocation */
4270   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4271   if (!newisdense) {
4272     PetscInt *new_local_nnz=0;
4273 
4274     ptr_vals = recv_buffer_vals;
4275     ptr_idxs = recv_buffer_idxs_local;
4276     if (n_recvs) {
4277       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4278     }
4279     for (i=0;i<n_recvs;i++) {
4280       PetscInt j;
4281       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4282         for (j=0;j<*(ptr_idxs+1);j++) {
4283           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4284         }
4285       } else {
4286         /* TODO */
4287       }
4288       ptr_idxs += olengths_idxs[i];
4289     }
4290     if (new_local_nnz) {
4291       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4292       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4293       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4294       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4295       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4296       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4297     } else {
4298       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4299     }
4300     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4301   } else {
4302     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4303   }
4304 
4305   /* set values */
4306   ptr_vals = recv_buffer_vals;
4307   ptr_idxs = recv_buffer_idxs_local;
4308   for (i=0;i<n_recvs;i++) {
4309     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4310       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4311       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4312       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4313       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4314       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4315     } else {
4316       /* TODO */
4317     }
4318     ptr_idxs += olengths_idxs[i];
4319     ptr_vals += olengths_vals[i];
4320   }
4321   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4322   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4323   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4324   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4325   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4326   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4327 
4328 #if 0
4329   if (!restrict_comm) { /* check */
4330     Vec       lvec,rvec;
4331     PetscReal infty_error;
4332 
4333     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4334     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4335     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4336     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4337     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4338     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4339     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4340     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4341     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4342   }
4343 #endif
4344 
4345   /* assemble new additional is (if any) */
4346   if (nis) {
4347     PetscInt **temp_idxs,*count_is,j,psum;
4348 
4349     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4350     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4351     ptr_idxs = recv_buffer_idxs_is;
4352     psum = 0;
4353     for (i=0;i<n_recvs;i++) {
4354       for (j=0;j<nis;j++) {
4355         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4356         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4357         psum += plen;
4358         ptr_idxs += plen+1; /* shift pointer to received data */
4359       }
4360     }
4361     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4362     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4363     for (i=1;i<nis;i++) {
4364       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4365     }
4366     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4367     ptr_idxs = recv_buffer_idxs_is;
4368     for (i=0;i<n_recvs;i++) {
4369       for (j=0;j<nis;j++) {
4370         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4371         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4372         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4373         ptr_idxs += plen+1; /* shift pointer to received data */
4374       }
4375     }
4376     for (i=0;i<nis;i++) {
4377       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4378       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4379       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4380     }
4381     ierr = PetscFree(count_is);CHKERRQ(ierr);
4382     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4383     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4384   }
4385   /* free workspace */
4386   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4387   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4388   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4389   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4390   if (isdense) {
4391     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4392     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4393   } else {
4394     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4395   }
4396   if (nis) {
4397     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4398     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4399   }
4400   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4401   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4402   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4403   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4404   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4405   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4406   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4407   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4408   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4409   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4410   ierr = PetscFree(onodes);CHKERRQ(ierr);
4411   if (nis) {
4412     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4413     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4414     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4415   }
4416   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4417   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4418     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4419     for (i=0;i<nis;i++) {
4420       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4421     }
4422     *mat_n = NULL;
4423   }
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 /* temporary hack into ksp private data structure */
4428 #include <petsc/private/kspimpl.h>
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4432 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4433 {
4434   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4435   PC_IS                  *pcis = (PC_IS*)pc->data;
4436   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4437   MatNullSpace           CoarseNullSpace=NULL;
4438   ISLocalToGlobalMapping coarse_islg;
4439   IS                     coarse_is,*isarray;
4440   PetscInt               i,im_active=-1,active_procs=-1;
4441   PetscInt               nis,nisdofs,nisneu;
4442   PC                     pc_temp;
4443   PCType                 coarse_pc_type;
4444   KSPType                coarse_ksp_type;
4445   PetscBool              multilevel_requested,multilevel_allowed;
4446   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4447   Mat                    t_coarse_mat_is;
4448   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4449   PetscMPIInt            all_procs;
4450   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4451   PetscBool              compute_vecs = PETSC_FALSE;
4452   PetscScalar            *array;
4453   PetscErrorCode         ierr;
4454 
4455   PetscFunctionBegin;
4456   /* Assign global numbering to coarse dofs */
4457   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4458     PetscInt ocoarse_size;
4459     compute_vecs = PETSC_TRUE;
4460     ocoarse_size = pcbddc->coarse_size;
4461     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4462     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4463     /* see if we can avoid some work */
4464     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4465       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4466       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4467         PC        pc;
4468         PetscBool isbddc;
4469 
4470         /* temporary workaround since PCBDDC does not have a reset method so far */
4471         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4472         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4473         if (isbddc) {
4474           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4475         }
4476         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4477         coarse_reuse = PETSC_FALSE;
4478       } else { /* we can safely reuse already computed coarse matrix */
4479         coarse_reuse = PETSC_TRUE;
4480       }
4481     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4482       coarse_reuse = PETSC_FALSE;
4483     }
4484     /* reset any subassembling information */
4485     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4486     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4487   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4488     coarse_reuse = PETSC_TRUE;
4489   }
4490 
4491   /* count "active" (i.e. with positive local size) and "void" processes */
4492   im_active = !!(pcis->n);
4493   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4494   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4495   void_procs = all_procs-active_procs;
4496   csin_type_simple = PETSC_TRUE;
4497   redist = PETSC_FALSE;
4498   if (pcbddc->current_level && void_procs) {
4499     csin_ml = PETSC_TRUE;
4500     ncoarse_ml = void_procs;
4501     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4502     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4503       csin_ds = PETSC_TRUE;
4504       ncoarse_ds = pcbddc->redistribute_coarse;
4505       redist = PETSC_TRUE;
4506     } else {
4507       csin_ds = PETSC_TRUE;
4508       ncoarse_ds = active_procs;
4509       redist = PETSC_TRUE;
4510     }
4511   } else {
4512     csin_ml = PETSC_FALSE;
4513     ncoarse_ml = all_procs;
4514     if (void_procs) {
4515       csin_ds = PETSC_TRUE;
4516       ncoarse_ds = void_procs;
4517       csin_type_simple = PETSC_FALSE;
4518     } else {
4519       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4520         csin_ds = PETSC_TRUE;
4521         ncoarse_ds = pcbddc->redistribute_coarse;
4522         redist = PETSC_TRUE;
4523       } else {
4524         csin_ds = PETSC_FALSE;
4525         ncoarse_ds = all_procs;
4526       }
4527     }
4528   }
4529 
4530   /*
4531     test if we can go multilevel: three conditions must be satisfied:
4532     - we have not exceeded the number of levels requested
4533     - we can actually subassemble the active processes
4534     - we can find a suitable number of MPI processes where we can place the subassembled problem
4535   */
4536   multilevel_allowed = PETSC_FALSE;
4537   multilevel_requested = PETSC_FALSE;
4538   if (pcbddc->current_level < pcbddc->max_levels) {
4539     multilevel_requested = PETSC_TRUE;
4540     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4541       multilevel_allowed = PETSC_FALSE;
4542     } else {
4543       multilevel_allowed = PETSC_TRUE;
4544     }
4545   }
4546   /* determine number of process partecipating to coarse solver */
4547   if (multilevel_allowed) {
4548     ncoarse = ncoarse_ml;
4549     csin = csin_ml;
4550     redist = PETSC_FALSE;
4551   } else {
4552     ncoarse = ncoarse_ds;
4553     csin = csin_ds;
4554   }
4555 
4556   /* creates temporary l2gmap and IS for coarse indexes */
4557   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4558   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4559 
4560   /* creates temporary MATIS object for coarse matrix */
4561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4562   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4563   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4564   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4565 #if 0
4566   {
4567     PetscViewer viewer;
4568     char filename[256];
4569     sprintf(filename,"local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4570     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4571     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4572     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4573     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4574   }
4575 #endif
4576   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4577   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4578   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4579   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4580   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4581 
4582   /* compute dofs splitting and neumann boundaries for coarse dofs */
4583   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4584     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4585     const PetscInt         *idxs;
4586     ISLocalToGlobalMapping tmap;
4587 
4588     /* create map between primal indices (in local representative ordering) and local primal numbering */
4589     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4590     /* allocate space for temporary storage */
4591     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4592     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4593     /* allocate for IS array */
4594     nisdofs = pcbddc->n_ISForDofsLocal;
4595     nisneu = !!pcbddc->NeumannBoundariesLocal;
4596     nis = nisdofs + nisneu;
4597     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4598     /* dofs splitting */
4599     for (i=0;i<nisdofs;i++) {
4600       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4601       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4602       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4603       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4604       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4605       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4606       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4607       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4608     }
4609     /* neumann boundaries */
4610     if (pcbddc->NeumannBoundariesLocal) {
4611       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4612       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4613       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4614       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4615       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4616       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4617       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4618       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4619     }
4620     /* free memory */
4621     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4622     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4623     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4624   } else {
4625     nis = 0;
4626     nisdofs = 0;
4627     nisneu = 0;
4628     isarray = NULL;
4629   }
4630   /* destroy no longer needed map */
4631   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4632 
4633   /* restrict on coarse candidates (if needed) */
4634   coarse_mat_is = NULL;
4635   if (csin) {
4636     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4637       if (redist) {
4638         PetscMPIInt rank;
4639         PetscInt    spc,n_spc_p1,dest[1],destsize;
4640 
4641         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4642         spc = active_procs/ncoarse;
4643         n_spc_p1 = active_procs%ncoarse;
4644         if (im_active) {
4645           destsize = 1;
4646           if (rank > n_spc_p1*(spc+1)-1) {
4647             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4648           } else {
4649             dest[0] = rank/(spc+1);
4650           }
4651         } else {
4652           destsize = 0;
4653         }
4654         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4655       } else if (csin_type_simple) {
4656         PetscMPIInt rank;
4657         PetscInt    issize,isidx;
4658 
4659         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4660         if (im_active) {
4661           issize = 1;
4662           isidx = (PetscInt)rank;
4663         } else {
4664           issize = 0;
4665           isidx = -1;
4666         }
4667         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4668       } else { /* get a suitable subassembling pattern from MATIS code */
4669         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4670       }
4671 
4672       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4673       if (!redist || ncoarse <= void_procs) {
4674         PetscInt ncoarse_cand,tissize,*nisindices;
4675         PetscInt *coarse_candidates;
4676         const PetscInt* tisindices;
4677 
4678         /* get coarse candidates' ranks in pc communicator */
4679         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4680         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4681         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4682           if (!coarse_candidates[i]) {
4683             coarse_candidates[ncoarse_cand++]=i;
4684           }
4685         }
4686         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4687 
4688 
4689         if (pcbddc->dbg_flag) {
4690           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4691           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4692           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4693           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4694           for (i=0;i<ncoarse_cand;i++) {
4695             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4696           }
4697           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4698           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4699         }
4700         /* shift the pattern on coarse candidates */
4701         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4702         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4703         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4704         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4705         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4706         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4707         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4708       }
4709       if (pcbddc->dbg_flag) {
4710         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4711         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4712         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4713         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4714       }
4715     }
4716     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4717     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4718       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4719     } else { /* this is the last level, so use just receiving processes in subcomm */
4720       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4721     }
4722   } else {
4723     if (pcbddc->dbg_flag) {
4724       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4725       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4726       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4727     }
4728     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4729     coarse_mat_is = t_coarse_mat_is;
4730   }
4731 
4732   /* create local to global scatters for coarse problem */
4733   if (compute_vecs) {
4734     PetscInt lrows;
4735     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4736     if (coarse_mat_is) {
4737       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4738     } else {
4739       lrows = 0;
4740     }
4741     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4742     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4743     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4744     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4745     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4746   }
4747   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4748   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4749 
4750   /* set defaults for coarse KSP and PC */
4751   if (multilevel_allowed) {
4752     coarse_ksp_type = KSPRICHARDSON;
4753     coarse_pc_type = PCBDDC;
4754   } else {
4755     coarse_ksp_type = KSPPREONLY;
4756     coarse_pc_type = PCREDUNDANT;
4757   }
4758 
4759   /* print some info if requested */
4760   if (pcbddc->dbg_flag) {
4761     if (!multilevel_allowed) {
4762       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4763       if (multilevel_requested) {
4764         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4765       } else if (pcbddc->max_levels) {
4766         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4767       }
4768       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4769     }
4770   }
4771 
4772   /* create the coarse KSP object only once with defaults */
4773   if (coarse_mat_is) {
4774     MatReuse coarse_mat_reuse;
4775     PetscViewer dbg_viewer = NULL;
4776     if (pcbddc->dbg_flag) {
4777       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4778       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4779     }
4780     if (!pcbddc->coarse_ksp) {
4781       char prefix[256],str_level[16];
4782       size_t len;
4783       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4784       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4785       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4786       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4787       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4788       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4789       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4790       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4791       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4792       /* prefix */
4793       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4794       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4795       if (!pcbddc->current_level) {
4796         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4797         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4798       } else {
4799         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4800         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4801         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4802         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4803         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4804         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4805       }
4806       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4807       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4808       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4809       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4810       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4811       /* allow user customization */
4812       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4813     }
4814     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4815     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4816     if (nisdofs) {
4817       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4818       for (i=0;i<nisdofs;i++) {
4819         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4820       }
4821     }
4822     if (nisneu) {
4823       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4824       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4825     }
4826 
4827     /* get some info after set from options */
4828     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4829     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4830     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4831     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4832       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4833       isbddc = PETSC_FALSE;
4834     }
4835     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4836     if (isredundant) {
4837       KSP inner_ksp;
4838       PC  inner_pc;
4839       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4840       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4841       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4842     }
4843 
4844     /* assemble coarse matrix */
4845     if (coarse_reuse) {
4846       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4847       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4848       coarse_mat_reuse = MAT_REUSE_MATRIX;
4849     } else {
4850       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4851     }
4852     if (isbddc || isnn) {
4853       if (pcbddc->coarsening_ratio > 1) {
4854         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4855           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4856           if (pcbddc->dbg_flag) {
4857             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4858             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4859             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4860             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4861           }
4862         }
4863         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4864       } else {
4865         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4866         coarse_mat = coarse_mat_is;
4867       }
4868     } else {
4869       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4870     }
4871     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4872 
4873     /* propagate symmetry info of coarse matrix */
4874     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4875     if (pc->pmat->symmetric_set) {
4876       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4877     }
4878     if (pc->pmat->hermitian_set) {
4879       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4880     }
4881     if (pc->pmat->spd_set) {
4882       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4883     }
4884     /* set operators */
4885     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4886     if (pcbddc->dbg_flag) {
4887       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4888     }
4889   } else { /* processes non partecipating to coarse solver (if any) */
4890     coarse_mat = 0;
4891   }
4892   ierr = PetscFree(isarray);CHKERRQ(ierr);
4893 #if 0
4894   {
4895     PetscViewer viewer;
4896     char filename[256];
4897     sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
4898     ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr);
4899     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4900     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4901     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4902   }
4903 #endif
4904 
4905   /* Compute coarse null space (special handling by BDDC only) */
4906 #if 0
4907   if (pcbddc->NullSpace) {
4908     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4909   }
4910 #endif
4911   /* hack */
4912   if (pcbddc->coarse_ksp) {
4913     Vec crhs,csol;
4914 
4915     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4916     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4917     if (!csol) {
4918       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4919     }
4920     if (!crhs) {
4921       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4922     }
4923   }
4924 
4925   /* compute null space for coarse solver if the benign trick has been requested */
4926   if (pcbddc->benign_null) {
4927 
4928     ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr);
4929     ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-1,1.0,INSERT_VALUES);CHKERRQ(ierr);
4930     ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
4931     ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
4932     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4933     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4934     if (coarse_mat) {
4935       Vec         nullv;
4936       PetscScalar *array,*array2;
4937       PetscInt    nl;
4938 
4939       ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr);
4940       ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr);
4941       ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
4942       ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr);
4943       ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr);
4944       ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr);
4945       ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
4946       ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr);
4947       ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr);
4948       ierr = VecDestroy(&nullv);CHKERRQ(ierr);
4949     }
4950   }
4951 
4952   if (pcbddc->coarse_ksp) {
4953     PetscBool ispreonly;
4954 
4955     if (CoarseNullSpace) {
4956       PetscBool isnull;
4957       ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr);
4958       if (isnull) {
4959         if (isbddc) {
4960           ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4961         } else {
4962           ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4963         }
4964       } else {
4965         ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4966       }
4967     }
4968     /* setup coarse ksp */
4969     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4970     /* Check coarse problem if in debug mode or if solving with an iterative method */
4971     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4972     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4973       KSP       check_ksp;
4974       KSPType   check_ksp_type;
4975       PC        check_pc;
4976       Vec       check_vec,coarse_vec;
4977       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4978       PetscInt  its;
4979       PetscBool compute_eigs;
4980       PetscReal *eigs_r,*eigs_c;
4981       PetscInt  neigs;
4982       const char *prefix;
4983 
4984       /* Create ksp object suitable for estimation of extreme eigenvalues */
4985       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4986       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4987       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4988       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4989       if (ispreonly) {
4990         check_ksp_type = KSPPREONLY;
4991         compute_eigs = PETSC_FALSE;
4992       } else {
4993         check_ksp_type = KSPGMRES;
4994         compute_eigs = PETSC_TRUE;
4995       }
4996       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4997       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4998       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4999       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
5000       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
5001       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
5002       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
5003       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
5004       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
5005       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
5006       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
5007       /* create random vec */
5008       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
5009       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
5010       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
5011       if (CoarseNullSpace) {
5012         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
5013       }
5014       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5015       /* solve coarse problem */
5016       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
5017       if (CoarseNullSpace) {
5018         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
5019       }
5020       /* set eigenvalue estimation if preonly has not been requested */
5021       if (compute_eigs) {
5022         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
5023         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
5024         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
5025         lambda_max = eigs_r[neigs-1];
5026         lambda_min = eigs_r[0];
5027         if (pcbddc->use_coarse_estimates) {
5028           if (lambda_max>lambda_min) {
5029             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
5030             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
5031           }
5032         }
5033       }
5034 
5035       /* check coarse problem residual error */
5036       if (pcbddc->dbg_flag) {
5037         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
5038         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5039         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
5040         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
5041         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
5042         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
5043         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
5044         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
5045         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
5046         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
5047         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
5048         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
5049         if (CoarseNullSpace) {
5050           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr);
5051         }
5052         if (compute_eigs) {
5053           PetscReal lambda_max_s,lambda_min_s;
5054           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
5055           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
5056           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
5057           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
5058           for (i=0;i<neigs;i++) {
5059             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
5060           }
5061         }
5062         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
5063         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
5064       }
5065       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
5066       if (compute_eigs) {
5067         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
5068         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
5069       }
5070     }
5071   }
5072   /* print additional info */
5073   if (pcbddc->dbg_flag) {
5074     /* waits until all processes reaches this point */
5075     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
5076     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
5077     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5078   }
5079 
5080   /* free memory */
5081   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
5082   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
5083   PetscFunctionReturn(0);
5084 }
5085 
5086 #undef __FUNCT__
5087 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
5088 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
5089 {
5090   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
5091   PC_IS*         pcis = (PC_IS*)pc->data;
5092   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
5093   IS             subset,subset_mult,subset_n;
5094   PetscInt       local_size,coarse_size=0;
5095   PetscInt       *local_primal_indices=NULL;
5096   const PetscInt *t_local_primal_indices;
5097   PetscErrorCode ierr;
5098 
5099   PetscFunctionBegin;
5100   /* Compute global number of coarse dofs */
5101   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
5102     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
5103   }
5104   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
5105   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
5106   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5107   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
5108   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
5109   ierr = ISDestroy(&subset);CHKERRQ(ierr);
5110   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
5111   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
5112   if (local_size != pcbddc->local_primal_size) {
5113     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
5114   }
5115   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
5116   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5117   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
5118   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
5119   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
5120 
5121   /* check numbering */
5122   if (pcbddc->dbg_flag) {
5123     PetscScalar coarsesum,*array,*array2;
5124     PetscInt    i;
5125     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
5126 
5127     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5128     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
5129     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
5130     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
5131     /* counter */
5132     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5133     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
5134     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5135     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5136     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5137     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5138 
5139     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
5140     for (i=0;i<pcbddc->local_primal_size;i++) {
5141       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
5142     }
5143     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5144     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5145     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5146     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5147     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5148     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5149     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5150     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5151     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5152     for (i=0;i<pcis->n;i++) {
5153       if (array[i] != 0.0 && array[i] != array2[i]) {
5154         PetscInt owned = (PetscInt)PetscRealPart(array[i]);
5155         PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
5156         set_error = PETSC_TRUE;
5157         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
5158       }
5159     }
5160     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
5161     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
5162     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5163     for (i=0;i<pcis->n;i++) {
5164       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
5165     }
5166     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5167     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
5168     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5169     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5170     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
5171     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
5172     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
5173       PetscInt *gidxs;
5174 
5175       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
5176       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
5177       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
5178       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5179       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
5180       for (i=0;i<pcbddc->local_primal_size;i++) {
5181         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
5182       }
5183       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5184       ierr = PetscFree(gidxs);CHKERRQ(ierr);
5185     }
5186     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
5187     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
5188   }
5189   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
5190   /* get back data */
5191   *coarse_size_n = coarse_size;
5192   *local_primal_indices_n = local_primal_indices;
5193   PetscFunctionReturn(0);
5194 }
5195 
5196 #undef __FUNCT__
5197 #define __FUNCT__ "PCBDDCGlobalToLocal"
5198 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
5199 {
5200   IS             localis_t;
5201   PetscInt       i,lsize,*idxs,n;
5202   PetscScalar    *vals;
5203   PetscErrorCode ierr;
5204 
5205   PetscFunctionBegin;
5206   /* get indices in local ordering exploiting local to global map */
5207   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
5208   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
5209   for (i=0;i<lsize;i++) vals[i] = 1.0;
5210   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5211   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
5212   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
5213   if (idxs) { /* multilevel guard */
5214     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
5215   }
5216   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
5217   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
5218   ierr = PetscFree(vals);CHKERRQ(ierr);
5219   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
5220   /* now compute set in local ordering */
5221   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5222   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5223   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5224   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
5225   for (i=0,lsize=0;i<n;i++) {
5226     if (PetscRealPart(vals[i]) > 0.5) {
5227       lsize++;
5228     }
5229   }
5230   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
5231   for (i=0,lsize=0;i<n;i++) {
5232     if (PetscRealPart(vals[i]) > 0.5) {
5233       idxs[lsize++] = i;
5234     }
5235   }
5236   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
5237   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
5238   *localis = localis_t;
5239   PetscFunctionReturn(0);
5240 }
5241 
5242 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
5243 #undef __FUNCT__
5244 #define __FUNCT__ "PCBDDCMatMult_Private"
5245 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
5246 {
5247   PCBDDCChange_ctx change_ctx;
5248   PetscErrorCode   ierr;
5249 
5250   PetscFunctionBegin;
5251   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5252   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5253   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5254   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5255   PetscFunctionReturn(0);
5256 }
5257 
5258 #undef __FUNCT__
5259 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5260 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5261 {
5262   PCBDDCChange_ctx change_ctx;
5263   PetscErrorCode   ierr;
5264 
5265   PetscFunctionBegin;
5266   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5267   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5268   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5269   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5270   PetscFunctionReturn(0);
5271 }
5272 
5273 #undef __FUNCT__
5274 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5275 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5276 {
5277   PC_IS               *pcis=(PC_IS*)pc->data;
5278   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5279   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5280   Mat                 S_j;
5281   PetscInt            *used_xadj,*used_adjncy;
5282   PetscBool           free_used_adj;
5283   PetscErrorCode      ierr;
5284 
5285   PetscFunctionBegin;
5286   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5287   free_used_adj = PETSC_FALSE;
5288   if (pcbddc->sub_schurs_layers == -1) {
5289     used_xadj = NULL;
5290     used_adjncy = NULL;
5291   } else {
5292     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5293       used_xadj = pcbddc->mat_graph->xadj;
5294       used_adjncy = pcbddc->mat_graph->adjncy;
5295     } else if (pcbddc->computed_rowadj) {
5296       used_xadj = pcbddc->mat_graph->xadj;
5297       used_adjncy = pcbddc->mat_graph->adjncy;
5298     } else {
5299       PetscBool      flg_row=PETSC_FALSE;
5300       const PetscInt *xadj,*adjncy;
5301       PetscInt       nvtxs;
5302 
5303       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5304       if (flg_row) {
5305         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5306         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5307         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5308         free_used_adj = PETSC_TRUE;
5309       } else {
5310         pcbddc->sub_schurs_layers = -1;
5311         used_xadj = NULL;
5312         used_adjncy = NULL;
5313       }
5314       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5315     }
5316   }
5317 
5318   /* setup sub_schurs data */
5319   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5320   if (!sub_schurs->use_mumps) {
5321     /* pcbddc->ksp_D up to date only if not using MUMPS */
5322     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5323     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5324   } else {
5325     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5326     PetscBool isseqaij;
5327     if (!pcbddc->use_vertices && reuse_solvers) {
5328       PetscInt n_vertices;
5329 
5330       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5331       reuse_solvers = (PetscBool)!n_vertices;
5332     }
5333     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5334     if (!isseqaij) {
5335       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5336       if (matis->A == pcbddc->local_mat) {
5337         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5338         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5339       } else {
5340         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5341       }
5342     }
5343     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5344   }
5345   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5346 
5347   /* free adjacency */
5348   if (free_used_adj) {
5349     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5350   }
5351   PetscFunctionReturn(0);
5352 }
5353 
5354 #undef __FUNCT__
5355 #define __FUNCT__ "PCBDDCInitSubSchurs"
5356 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5357 {
5358   PC_IS               *pcis=(PC_IS*)pc->data;
5359   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5360   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5361   PCBDDCGraph         graph;
5362   PetscErrorCode      ierr;
5363 
5364   PetscFunctionBegin;
5365   /* attach interface graph for determining subsets */
5366   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5367     IS       verticesIS,verticescomm;
5368     PetscInt vsize,*idxs;
5369 
5370     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5371     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5372     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5373     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5374     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5375     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5376     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5377     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5378     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5379     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5380     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5381 /*
5382     if (pcbddc->dbg_flag) {
5383       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5384     }
5385 */
5386   } else {
5387     graph = pcbddc->mat_graph;
5388   }
5389 
5390   /* sub_schurs init */
5391   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5392 
5393   /* free graph struct */
5394   if (pcbddc->sub_schurs_rebuild) {
5395     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5396   }
5397   PetscFunctionReturn(0);
5398 }
5399 
5400 #undef __FUNCT__
5401 #define __FUNCT__ "PCBDDCCheckOperator"
5402 PetscErrorCode PCBDDCCheckOperator(PC pc)
5403 {
5404   PC_IS               *pcis=(PC_IS*)pc->data;
5405   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5406   PetscErrorCode      ierr;
5407 
5408   PetscFunctionBegin;
5409   if (pcbddc->n_vertices == pcbddc->local_primal_size) {
5410     IS             zerodiag = NULL;
5411     Mat            S_j,B0=NULL,B0_B=NULL;
5412     Vec            dummy_vec=NULL,vec_check_B,vec_scale_P;
5413     PetscScalar    p0_check,*array,*array2;
5414     PetscReal      norm;
5415     PetscInt       i;
5416 
5417     /* B0 and B0_B */
5418     if (zerodiag) {
5419       IS       dummy;
5420       PetscInt ii[2];
5421 
5422       ii[0] = 0;
5423       ii[1] = pcbddc->B0_ncol;
5424       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
5425       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
5426       ierr = MatGetSubMatrix(B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr);
5427       ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr);
5428       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
5429     }
5430     /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
5431     ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr);
5432     ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr);
5433     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5434     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5435     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5436     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5437     ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr);
5438     /* S_j */
5439     ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5440     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5441 
5442     /* mimic vector in \widetilde{W}_\Gamma */
5443     ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr);
5444     /* continuous in primal space */
5445     ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr);
5446     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5447     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5448     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5449     if (zerodiag) {
5450       p0_check = array[pcbddc->local_primal_size-1];
5451     } else {
5452       p0_check = 0;
5453     }
5454     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5455     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5456     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5457     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5458     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5459     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5460     ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr);
5461     ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr);
5462 
5463     /* assemble rhs for coarse problem */
5464     /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
5465     /* local with Schur */
5466     ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr);
5467     if (zerodiag) {
5468       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5469       array[0] = p0_check;
5470       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5471       ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
5472     }
5473     /* sum on primal nodes the local contributions */
5474     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5475     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5476     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5477     ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5478     for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
5479     ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr);
5480     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
5481     ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr);
5482     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5483     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5484     ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5485     ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
5486     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5487     /* scale primal nodes (BDDC sums contibutions) */
5488     ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr);
5489     ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr);
5490     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
5491     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
5492     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
5493     ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5494     ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
5495     /* global: \widetilde{B0}_B w_\Gamma */
5496     if (zerodiag) {
5497       ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr);
5498       ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr);
5499       pcbddc->benign_p0 = array[0];
5500       ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr);
5501     } else {
5502       pcbddc->benign_p0 = 0.;
5503     }
5504     /* BDDC */
5505     ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr);
5506     ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr);
5507 
5508     ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
5509     ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr);
5510     ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr);
5511     PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
5512     if (pcbddc->benign_p0_lidx >= 0) {
5513       PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0 error is %1.4e\n",PetscGlobalRank,PetscAbsScalar(pcbddc->benign_p0-p0_check));
5514     }
5515 
5516     ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr);
5517     ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr);
5518     ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr);
5519     ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5520     ierr = MatDestroy(&B0);CHKERRQ(ierr);
5521     ierr = MatDestroy(&B0_B);CHKERRQ(ierr);
5522   }
5523   PetscFunctionReturn(0);
5524 }
5525