xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision efc2fbd988ffb32ec2d0f2ce2df54086b651f77c)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0"
10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   if (!pcbddc->benign_sf) {
17     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
18     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
19   }
20   if (pop) { /* use SF to get values */
21     PetscScalar *array;
22 
23     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
24     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
25     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
26     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
27   } else { /* use VecSetValue */
28     ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
29     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
30     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNCT__
36 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
37 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
38 {
39   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
40   PetscErrorCode ierr;
41 
42   PetscFunctionBegin;
43   /* TODO: add error checking
44     - avoid nested pop (or push) calls.
45     - cannot push before pop.
46   */
47   if (!pcbddc->zerodiag) {
48     PetscFunctionReturn(0);
49   }
50   if (pop) {
51     const PetscInt    *cB0_cols;
52     PetscInt          cB0_ncol;
53     const PetscScalar *cB0_vals;
54 
55     /* extract B_0 */
56     ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
57     pcbddc->B0_ncol = cB0_ncol;
58     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
59     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
60     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
61     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
62     ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
63     /* remove rows and cols from local problem */
64     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
65     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr);
66   } else { /* push */
67     ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
68     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
69     ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr);
70     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
71     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "PCBDDCAdaptiveSelection"
78 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
79 {
80   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
81   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
82   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
83   PetscBLASInt    *B_iwork,*B_ifail;
84   PetscScalar     *work,lwork;
85   PetscScalar     *St,*S,*eigv;
86   PetscScalar     *Sarray,*Starray;
87   PetscReal       *eigs,thresh;
88   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
89   PetscBool       allocated_S_St;
90 #if defined(PETSC_USE_COMPLEX)
91   PetscReal       *rwork;
92 #endif
93   PetscErrorCode  ierr;
94 
95   PetscFunctionBegin;
96   if (!sub_schurs->use_mumps) {
97     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
98   }
99 
100   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
101     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
102   }
103 
104   if (pcbddc->dbg_flag) {
105     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
108     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
109   }
110 
111   if (pcbddc->dbg_flag) {
112     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
113   }
114 
115   /* max size of subsets */
116   mss = 0;
117   for (i=0;i<sub_schurs->n_subs;i++) {
118     PetscInt subset_size;
119 
120     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
121     mss = PetscMax(mss,subset_size);
122   }
123 
124   /* min/max and threshold */
125   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
126   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
127   nmax = PetscMax(nmin,nmax);
128   allocated_S_St = PETSC_FALSE;
129   if (nmin) {
130     allocated_S_St = PETSC_TRUE;
131   }
132 
133   /* allocate lapack workspace */
134   cum = cum2 = 0;
135   maxneigs = 0;
136   for (i=0;i<sub_schurs->n_subs;i++) {
137     PetscInt n,subset_size;
138 
139     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
140     n = PetscMin(subset_size,nmax);
141     cum += subset_size;
142     cum2 += subset_size*n;
143     maxneigs = PetscMax(maxneigs,n);
144   }
145   if (mss) {
146     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
147       PetscBLASInt B_itype = 1;
148       PetscBLASInt B_N = mss;
149       PetscReal    zero = 0.0;
150       PetscReal    eps = 0.0; /* dlamch? */
151 
152       B_lwork = -1;
153       S = NULL;
154       St = NULL;
155       eigs = NULL;
156       eigv = NULL;
157       B_iwork = NULL;
158       B_ifail = NULL;
159 #if defined(PETSC_USE_COMPLEX)
160       rwork = NULL;
161 #endif
162       thresh = 1.0;
163       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
164 #if defined(PETSC_USE_COMPLEX)
165       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
166 #else
167       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
168 #endif
169       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
170       ierr = PetscFPTrapPop();CHKERRQ(ierr);
171     } else {
172         /* TODO */
173     }
174   } else {
175     lwork = 0;
176   }
177 
178   nv = 0;
179   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
180     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
181   }
182   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
183   if (allocated_S_St) {
184     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
185   }
186   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
187 #if defined(PETSC_USE_COMPLEX)
188   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
189 #endif
190   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
191                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
192                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
193                       nv+cum,&pcbddc->adaptive_constraints_idxs,
194                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
195   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
196 
197   maxneigs = 0;
198   cum = cum2 = cumarray = 0;
199   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
200   pcbddc->adaptive_constraints_data_ptr[0] = 0;
201   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
202     const PetscInt *idxs;
203 
204     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
205     for (cum=0;cum<nv;cum++) {
206       pcbddc->adaptive_constraints_n[cum] = 1;
207       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
208       pcbddc->adaptive_constraints_data[cum] = 1.0;
209       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
210       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
211     }
212     cum2 = cum;
213     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
214   }
215 
216   if (mss) { /* multilevel */
217     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
218     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
219   }
220 
221   for (i=0;i<sub_schurs->n_subs;i++) {
222 
223     const PetscInt *idxs;
224     PetscReal      infty = PETSC_MAX_REAL;
225     PetscInt       j,subset_size,eigs_start = 0;
226     PetscBLASInt   B_N;
227     PetscBool      same_data = PETSC_FALSE;
228 
229     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
230     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
231     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
232       if (sub_schurs->is_hermitian) {
233         PetscInt j,k;
234         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
235           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237         }
238         for (j=0;j<subset_size;j++) {
239           for (k=j;k<subset_size;k++) {
240             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
241             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
242           }
243         }
244       } else {
245         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
246         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
247       }
248     } else {
249       S = Sarray + cumarray;
250       St = Starray + cumarray;
251     }
252 
253     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
254     /* see if we can save some work */
255     if (sub_schurs->n_subs == 1) {
256       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
257     }
258 
259     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
260       B_neigs = 0;
261     } else {
262       /* Threshold: this is an heuristic for edges */
263       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
264 
265       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
266         PetscBLASInt B_itype = 1;
267         PetscBLASInt B_IL, B_IU;
268         PetscReal    eps = -1.0; /* dlamch? */
269         PetscInt     nmin_s;
270 
271         /* ask for eigenvalues larger than thresh */
272         if (pcbddc->dbg_flag) {
273           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
274         }
275         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
276 #if defined(PETSC_USE_COMPLEX)
277         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
278 #else
279         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
280 #endif
281         ierr = PetscFPTrapPop();CHKERRQ(ierr);
282         if (B_ierr) {
283           if (B_ierr < 0 ) {
284             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
285           } else if (B_ierr <= B_N) {
286             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
287           } else {
288             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
289           }
290         }
291 
292         if (B_neigs > nmax) {
293           if (pcbddc->dbg_flag) {
294             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
295           }
296           eigs_start = B_neigs -nmax;
297           B_neigs = nmax;
298         }
299 
300         nmin_s = PetscMin(nmin,B_N);
301         if (B_neigs < nmin_s) {
302           PetscBLASInt B_neigs2;
303 
304           B_IU = B_N - B_neigs;
305           B_IL = B_N - nmin_s + 1;
306           if (pcbddc->dbg_flag) {
307             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
308           }
309           if (sub_schurs->is_hermitian) {
310             PetscInt j;
311             for (j=0;j<subset_size;j++) {
312               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
313             }
314             for (j=0;j<subset_size;j++) {
315               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
316             }
317           } else {
318             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
319             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
320           }
321           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
322 #if defined(PETSC_USE_COMPLEX)
323           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
324 #else
325           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
326 #endif
327           ierr = PetscFPTrapPop();CHKERRQ(ierr);
328           B_neigs += B_neigs2;
329         }
330         if (B_ierr) {
331           if (B_ierr < 0 ) {
332             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
333           } else if (B_ierr <= B_N) {
334             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
335           } else {
336             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
337           }
338         }
339         if (pcbddc->dbg_flag) {
340           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
341           for (j=0;j<B_neigs;j++) {
342             if (eigs[j] == 0.0) {
343               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
344             } else {
345               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
346             }
347           }
348         }
349       } else {
350           /* TODO */
351       }
352     }
353     maxneigs = PetscMax(B_neigs,maxneigs);
354     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
355     if (B_neigs) {
356       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358       if (pcbddc->dbg_flag > 1) {
359         PetscInt ii;
360         for (ii=0;ii<B_neigs;ii++) {
361           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
362           for (j=0;j<B_N;j++) {
363 #if defined(PETSC_USE_COMPLEX)
364             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
365             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
366             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
367 #else
368             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
369 #endif
370           }
371         }
372       }
373 #if 0
374       for (j=0;j<B_neigs;j++) {
375         PetscBLASInt Blas_N,Blas_one = 1.0;
376         PetscScalar norm;
377         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
378         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
379                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
380         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
381           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
382         } else {
383           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
384         }
385         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
386       }
387 #endif
388       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
389       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
390       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
391       cum++;
392     }
393     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
394     /* shift for next computation */
395     cumarray += subset_size*subset_size;
396   }
397   if (pcbddc->dbg_flag) {
398     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
399   }
400 
401   if (mss) {
402     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
403     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
404     /* destroy matrices (junk) */
405     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
406     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
407   }
408   if (allocated_S_St) {
409     ierr = PetscFree2(S,St);CHKERRQ(ierr);
410   }
411   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
412 #if defined(PETSC_USE_COMPLEX)
413   ierr = PetscFree(rwork);CHKERRQ(ierr);
414 #endif
415   if (pcbddc->dbg_flag) {
416     PetscInt maxneigs_r;
417     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
418     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "PCBDDCSetUpSolvers"
425 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
426 {
427   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
428   PetscScalar    *coarse_submat_vals;
429   PetscErrorCode ierr;
430 
431   PetscFunctionBegin;
432   /* Setup local scatters R_to_B and (optionally) R_to_D */
433   /* PCBDDCSetUpLocalWorkVectors should be called first! */
434   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
435 
436   /* Setup local neumann solver ksp_R */
437   /* PCBDDCSetUpLocalScatters should be called first! */
438   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
439 
440   /* Change global null space passed in by the user if change of basis has been requested */
441   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
442     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
443   }
444 
445   /*
446      Setup local correction and local part of coarse basis.
447      Gives back the dense local part of the coarse matrix in column major ordering
448   */
449   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
450 
451   /* Compute total number of coarse nodes and setup coarse solver */
452   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
453 
454   /* free */
455   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "PCBDDCResetCustomization"
461 PetscErrorCode PCBDDCResetCustomization(PC pc)
462 {
463   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
464   PetscErrorCode ierr;
465 
466   PetscFunctionBegin;
467   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
468   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
469   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
470   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
471   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
472   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
473   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
474   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
475   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
476   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
477   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "PCBDDCResetTopography"
483 PetscErrorCode PCBDDCResetTopography(PC pc)
484 {
485   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
490   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
491   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
492   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
493   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "PCBDDCResetSolvers"
499 PetscErrorCode PCBDDCResetSolvers(PC pc)
500 {
501   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
502   PetscScalar    *array;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
507   if (pcbddc->coarse_phi_B) {
508     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
509     ierr = PetscFree(array);CHKERRQ(ierr);
510   }
511   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
512   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
513   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
514   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
515   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
516   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
517   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
518   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
519   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
520   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
521   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
522   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
523   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
524   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
525   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
526   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
527   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
528   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
529   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
530   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
531   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
532   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
533   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
534   ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr);
535   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
536   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
542 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
543 {
544   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
545   PC_IS          *pcis = (PC_IS*)pc->data;
546   VecType        impVecType;
547   PetscInt       n_constraints,n_R,old_size,n_benign;
548   PetscErrorCode ierr;
549 
550   PetscFunctionBegin;
551   if (!pcbddc->ConstraintMatrix) {
552     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
553   }
554   /* get sizes */
555   n_benign = (PetscInt)(!!pcbddc->zerodiag);
556   n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices;
557   if (n_benign && n_constraints) {
558     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis");
559   }
560   n_R = pcis->n - pcbddc->n_vertices;
561   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
562   /* local work vectors (try to avoid unneeded work)*/
563   /* R nodes */
564   old_size = -1;
565   if (pcbddc->vec1_R) {
566     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
567   }
568   if (n_R != old_size) {
569     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
570     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
571     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
572     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
573     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
574     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
575   }
576   /* local primal dofs */
577   old_size = -1;
578   if (pcbddc->vec1_P) {
579     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
580   }
581   if (pcbddc->local_primal_size != old_size) {
582     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
583     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
584     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
585     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
586   }
587   /* local explicit constraints */
588   old_size = -1;
589   if (pcbddc->vec1_C) {
590     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
591   }
592   if (n_constraints && n_constraints != old_size) {
593     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
594     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
595     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
596     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
597   }
598   PetscFunctionReturn(0);
599 }
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "PCBDDCSetUpCorrection"
603 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
604 {
605   PetscErrorCode  ierr;
606   /* pointers to pcis and pcbddc */
607   PC_IS*          pcis = (PC_IS*)pc->data;
608   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
609   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
610   /* submatrices of local problem */
611   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
612   /* submatrices of benign trick */
613   Mat             B0_R,B0_V;
614   /* submatrices of local coarse problem */
615   Mat             S_VV,S_CV,S_VC,S_CC;
616   /* working matrices */
617   Mat             C_CR;
618   /* additional working stuff */
619   PC              pc_R;
620   Mat             F;
621   Vec             vec_R_benign;
622   PetscBool       isLU,isCHOL,isILU;
623 
624   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
625   PetscScalar     *work;
626   PetscInt        *idx_V_B;
627   PetscInt        n,n_vertices,n_constraints,n_benign;
628   PetscInt        i,n_R,n_D,n_B;
629   PetscBool       unsymmetric_check;
630   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
631   MatType         impMatType;
632   /* some shortcuts to scalars */
633   PetscScalar     one=1.0,m_one=-1.0;
634 
635   PetscFunctionBegin;
636   n_benign = (PetscInt)(!!pcbddc->zerodiag);
637   n_vertices = pcbddc->n_vertices;
638   n_constraints = pcbddc->local_primal_size - n_benign - n_vertices;
639   /* Set Non-overlapping dimensions */
640   n_B = pcis->n_B;
641   n_D = pcis->n - n_B;
642   n_R = pcis->n - n_vertices;
643 
644   /* Set types for local objects needed by BDDC precondtioner */
645   impMatType = MATSEQDENSE;
646 
647   /* vertices in boundary numbering */
648   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
649   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
650   if (i != n_vertices) {
651     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
652   }
653 
654   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
655   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
656   if (!n_benign) {
657     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
658     ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
659     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
660     ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
661     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
662     ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
663     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
664     ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
665   } else { /* these wrappers are not needed with the benign trick */
666     S_VV = S_CV = S_VC = S_CC = NULL;
667   }
668 
669   unsymmetric_check = PETSC_FALSE;
670   /* allocate workspace */
671   n = 0;
672   if (n_constraints) {
673     n += n_R*n_constraints;
674   }
675   if (n_vertices) {
676     n = PetscMax(2*n_R*n_vertices,n);
677     n = PetscMax((n_R+n_B)*n_vertices,n);
678   }
679   if (!pcbddc->symmetric_primal) {
680     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
681     unsymmetric_check = PETSC_TRUE;
682   }
683   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
684 
685   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
686   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
687   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
688   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
689   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
690   if (isLU || isILU || isCHOL) {
691     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
692   } else if (sub_schurs->reuse_mumps) {
693     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
694     MatFactorType type;
695 
696     F = reuse_mumps->F;
697     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
698     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
699   } else {
700     F = NULL;
701   }
702 
703   /* Precompute stuffs needed for preprocessing and application of BDDC*/
704   if (n_constraints) {
705     Mat         M1,M2,M3;
706     Mat         auxmat;
707     IS          is_aux;
708     PetscScalar *array,*array2;
709 
710     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
711     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
712 
713     /* Extract constraints on R nodes: C_{CR}  */
714     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
715     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
716     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
717 
718     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
719     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
720     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
721     for (i=0;i<n_constraints;i++) {
722       const PetscScalar *row_cmat_values;
723       const PetscInt    *row_cmat_indices;
724       PetscInt          size_of_constraint,j;
725 
726       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
727       for (j=0;j<size_of_constraint;j++) {
728         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
729       }
730       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
731     }
732     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
733     if (F) {
734       Mat B;
735 
736       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
737       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
738       ierr = MatDestroy(&B);CHKERRQ(ierr);
739     } else {
740       PetscScalar *marr;
741 
742       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
743       for (i=0;i<n_constraints;i++) {
744         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
745         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
746         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
747         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
748         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
749       }
750       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
751     }
752     if (!pcbddc->switch_static) {
753       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
754       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
755       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
756       for (i=0;i<n_constraints;i++) {
757         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
758         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
759         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
761         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
762         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
763       }
764       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
765       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
766       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
767     } else {
768       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
769       pcbddc->local_auxmat2 = local_auxmat2_R;
770       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
771     }
772     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
773     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
774     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
775     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
776     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
777     if (isCHOL) {
778       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
779     } else {
780       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
781     }
782     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
783     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
784     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
785     ierr = MatDestroy(&M2);CHKERRQ(ierr);
786     ierr = MatDestroy(&M3);CHKERRQ(ierr);
787     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
788     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
789     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
790     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
791     ierr = MatDestroy(&M1);CHKERRQ(ierr);
792   }
793   /* Get submatrices from subdomain matrix */
794   if (n_vertices) {
795     IS is_aux;
796 
797     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
798       IS tis;
799 
800       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
801       ierr = ISSort(tis);CHKERRQ(ierr);
802       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
803       ierr = ISDestroy(&tis);CHKERRQ(ierr);
804     } else {
805       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
806     }
807     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
808     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
809     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
810     if (n_benign) {
811       Mat      B0;
812       IS       dummy;
813       PetscInt ii[2];
814 
815       ii[0] = 0;
816       ii[1] = pcbddc->B0_ncol;
817       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
818       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
819       ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
820       ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
821       ierr = MatDestroy(&B0);CHKERRQ(ierr);
822       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
823     }
824     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
825   }
826 
827   /* Matrix of coarse basis functions (local) */
828   if (pcbddc->coarse_phi_B) {
829     PetscInt on_B,on_primal,on_D=n_D;
830     if (pcbddc->coarse_phi_D) {
831       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
832     }
833     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
834     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
835       PetscScalar *marray;
836 
837       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
838       ierr = PetscFree(marray);CHKERRQ(ierr);
839       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
840       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
841       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
842       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
843     }
844   }
845 
846   if (!pcbddc->coarse_phi_B) {
847     PetscScalar *marray;
848 
849     n = n_B*pcbddc->local_primal_size;
850     if (pcbddc->switch_static || pcbddc->dbg_flag) {
851       n += n_D*pcbddc->local_primal_size;
852     }
853     if (!pcbddc->symmetric_primal) {
854       n *= 2;
855     }
856     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
857     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
858     n = n_B*pcbddc->local_primal_size;
859     if (pcbddc->switch_static || pcbddc->dbg_flag) {
860       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
861       n += n_D*pcbddc->local_primal_size;
862     }
863     if (!pcbddc->symmetric_primal) {
864       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
865       if (pcbddc->switch_static || pcbddc->dbg_flag) {
866         n = n_B*pcbddc->local_primal_size;
867         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
868       }
869     } else {
870       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
871       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
872       if (pcbddc->switch_static || pcbddc->dbg_flag) {
873         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
874         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
875       }
876     }
877   }
878 
879   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
880   /* vertices */
881   if (n_vertices) {
882 
883     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
884 
885     if (n_R) {
886       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
887       Mat          S_V_benign;
888       PetscBLASInt B_N,B_one = 1;
889       PetscScalar  *x,*y;
890       PetscBool    isseqaij;
891 
892       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
893       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
894       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
895       if (F) { /* TODO could be optimized for symmetric problems */
896         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
897       } else {
898         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
899         for (i=0;i<n_vertices;i++) {
900           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
901           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
902           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
903           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
904           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
905         }
906         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
907       }
908       if (n_benign) {
909         const PetscScalar *vals;
910         const PetscInt    *idxs;
911         PetscInt          n;
912 
913         ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr);
914         ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
915         ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
916         ierr = VecSetValues(pcbddc->vec1_R,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
917         ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
918         ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
920         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,vec_R_benign);CHKERRQ(ierr);
921         ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr);
922       }
923       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
924       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
925       if (n_constraints) {
926         Mat B;
927 
928         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
929         for (i=0;i<n_vertices;i++) {
930           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
931           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
932           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
933           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
934           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
935           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
936         }
937         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
938         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
939         ierr = MatDestroy(&B);CHKERRQ(ierr);
940         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
941         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
942         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
943         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
944         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
945         ierr = MatDestroy(&B);CHKERRQ(ierr);
946       }
947       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
948       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
949         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
950       }
951       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
952       if (n_benign) {
953         ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr);
954       }
955       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
956       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
957       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
958       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
959       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
960       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
961       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
962       if (!n_benign) {
963         ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
964       } else {
965         const PetscScalar *vals;
966         PetscScalar       val;
967         const PetscInt    *idxs;
968         PetscInt          n,j;
969 
970         /* Use explicit loops to populate the local coarse matrix */
971         ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr);
972         for (j=0;j<n_vertices;j++) {
973           for (i=0;i<n_vertices;i++) {
974             coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i];
975           }
976         }
977         ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr);
978         ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr);
979         ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
980         for (j=0;j<n_vertices;j++) {
981           coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j];
982           coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j];
983         }
984         ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
985         ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr);
986         ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr);
987         ierr = VecDot(vec_R_benign,vec_R_benign,&val);CHKERRQ(ierr);
988         coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = -val;
989       }
990       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
991     } else {
992       /* TODO benign case also here */
993       if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented");
994       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
995     }
996     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
997     /* coarse basis functions */
998     for (i=0;i<n_vertices;i++) {
999       PetscScalar *y;
1000 
1001       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1002       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1003       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1004       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006       y[n_B*i+idx_V_B[i]] = 1.0;
1007       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1008       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1009 
1010       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1011         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1012         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1013         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1014         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1015         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1016         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1017       }
1018       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1019     }
1020     if (n_benign) {
1021       PetscScalar *y;
1022 
1023       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1024       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr);
1025       ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026       ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1028       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1029 
1030       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1031         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1032         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1033         ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034         ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1036         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1037       }
1038     }
1039     /* if n_R == 0 the object is not destroyed */
1040     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1041   }
1042 
1043   if (n_constraints) {
1044     Mat B;
1045 
1046     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1047     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1048     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1049     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1050     if (n_vertices) {
1051       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1052         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1053       } else {
1054         Mat S_VCt;
1055 
1056         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1057         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1058         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1059       }
1060     }
1061     ierr = MatDestroy(&B);CHKERRQ(ierr);
1062     /* coarse basis functions */
1063     for (i=0;i<n_constraints;i++) {
1064       PetscScalar *y;
1065 
1066       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1067       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1068       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1069       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1072       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1073       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1074         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1075         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1076         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1077         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1078         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1079         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1080       }
1081       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1082     }
1083   }
1084   if (n_constraints) {
1085     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1086   }
1087 
1088   if (n_benign) {
1089     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1090     ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1091     ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr);
1092   }
1093 
1094   /* compute other basis functions for non-symmetric problems */
1095   if (!pcbddc->symmetric_primal) {
1096 
1097     if (n_constraints) {
1098       Mat S_CCT,B_C;
1099 
1100       /* this is a lazy thing */
1101       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1102       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1103       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1104       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1105       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1106       if (n_vertices) {
1107         Mat B_V,S_VCT;
1108 
1109         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1110         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1111         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1112         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1113         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1114       }
1115       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1116     } else { /* if there are no constraints, reset work */
1117       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1118     }
1119     if (n_vertices && n_R) {
1120       Mat          A_VRT;
1121       PetscScalar  *marray;
1122       PetscBLASInt B_N,B_one = 1;
1123 
1124       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1125       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1126       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1127       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1128       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1129       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1130       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1131     }
1132 
1133     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1134       for (i=0;i<pcbddc->local_primal_size;i++) {
1135         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1136         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1137         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1138         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1139         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1140       }
1141     } else {
1142       for (i=0;i<pcbddc->local_primal_size;i++) {
1143         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1144         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1145         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1146         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1147         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1148       }
1149     }
1150     /* coarse basis functions */
1151     for (i=0;i<pcbddc->local_primal_size;i++) {
1152       PetscScalar *y;
1153 
1154       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1155       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1156       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1157       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1158       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159       if (i<n_vertices) {
1160         y[n_B*i+idx_V_B[i]] = 1.0;
1161       }
1162       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1163       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1164 
1165       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1166         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1167         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1168         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1171         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1172       }
1173       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1174     }
1175   }
1176   /* free memory */
1177   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1178   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1179   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1180   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1181   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1182   ierr = PetscFree(work);CHKERRQ(ierr);
1183   if (n_vertices) {
1184     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1185   }
1186   if (n_constraints) {
1187     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1188   }
1189   /* Checking coarse_sub_mat and coarse basis functios */
1190   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1191   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1192   //if (pcbddc->dbg_flag) {
1193   if (0) {
1194     Mat         coarse_sub_mat;
1195     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1196     Mat         coarse_phi_D,coarse_phi_B;
1197     Mat         coarse_psi_D,coarse_psi_B;
1198     Mat         A_II,A_BB,A_IB,A_BI;
1199     Mat         C_B,CPHI;
1200     IS          is_dummy;
1201     Vec         mones;
1202     MatType     checkmattype=MATSEQAIJ;
1203     PetscReal   real_value;
1204 
1205     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1206     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1207     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1208     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1209     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1210     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1211     if (unsymmetric_check) {
1212       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1213       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1214     }
1215     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1216 
1217     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1218     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1219     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1220     if (unsymmetric_check) {
1221       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1222       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1223       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1224       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1225       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1226       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1227       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1228       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1229       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1230       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1231       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1232       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1233     } else {
1234       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1235       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1236       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1237       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1238       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1239       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1240       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1241       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1242     }
1243     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1244     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1245     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1246     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1247     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1248     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1249     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1250     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1251 
1252     /* check constraints */
1253     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1254     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1255     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1256     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1257     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1258     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1259     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1260     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1261     if (unsymmetric_check) {
1262       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1263       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1264       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1265       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1266       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1267     }
1268     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1269     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1270     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1271     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1272 
1273     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1274     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1275     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1276     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1277     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1278     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1279     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1280     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1281     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1282     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1283     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1284     if (unsymmetric_check) {
1285       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1286       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1287     }
1288     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1289   }
1290   /* get back data */
1291   *coarse_submat_vals_n = coarse_submat_vals;
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1297 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1298 {
1299   Mat            *work_mat;
1300   IS             isrow_s,iscol_s;
1301   PetscBool      rsorted,csorted;
1302   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1303   PetscErrorCode ierr;
1304 
1305   PetscFunctionBegin;
1306   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1307   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1308   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1309   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1310 
1311   if (!rsorted) {
1312     const PetscInt *idxs;
1313     PetscInt *idxs_sorted,i;
1314 
1315     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1316     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1317     for (i=0;i<rsize;i++) {
1318       idxs_perm_r[i] = i;
1319     }
1320     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1321     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1322     for (i=0;i<rsize;i++) {
1323       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1324     }
1325     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1326     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1327   } else {
1328     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1329     isrow_s = isrow;
1330   }
1331 
1332   if (!csorted) {
1333     if (isrow == iscol) {
1334       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1335       iscol_s = isrow_s;
1336     } else {
1337       const PetscInt *idxs;
1338       PetscInt *idxs_sorted,i;
1339 
1340       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1341       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1342       for (i=0;i<csize;i++) {
1343         idxs_perm_c[i] = i;
1344       }
1345       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1346       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1347       for (i=0;i<csize;i++) {
1348         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1349       }
1350       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1351       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1352     }
1353   } else {
1354     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1355     iscol_s = iscol;
1356   }
1357 
1358   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1359 
1360   if (!rsorted || !csorted) {
1361     Mat      new_mat;
1362     IS       is_perm_r,is_perm_c;
1363 
1364     if (!rsorted) {
1365       PetscInt *idxs_r,i;
1366       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1367       for (i=0;i<rsize;i++) {
1368         idxs_r[idxs_perm_r[i]] = i;
1369       }
1370       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1371       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1372     } else {
1373       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1374     }
1375     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1376 
1377     if (!csorted) {
1378       if (isrow_s == iscol_s) {
1379         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1380         is_perm_c = is_perm_r;
1381       } else {
1382         PetscInt *idxs_c,i;
1383         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1384         for (i=0;i<csize;i++) {
1385           idxs_c[idxs_perm_c[i]] = i;
1386         }
1387         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1388         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1389       }
1390     } else {
1391       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1392     }
1393     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1394 
1395     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1396     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1397     work_mat[0] = new_mat;
1398     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1399     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1400   }
1401 
1402   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1403   *B = work_mat[0];
1404   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1405   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1406   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1412 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1413 {
1414   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1415   PC_IS*         pcis = (PC_IS*)pc->data;
1416   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1417   Mat            new_mat;
1418   IS             is_local,is_global;
1419   PetscInt       local_size;
1420   PetscBool      isseqaij;
1421   PetscErrorCode ierr;
1422 
1423   PetscFunctionBegin;
1424   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1425   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1426   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1427   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr);
1428   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1429   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1430   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1431 
1432   /* check */
1433   if (pcbddc->dbg_flag) {
1434     Vec       x,x_change;
1435     PetscReal error;
1436 
1437     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1438     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1439     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1440     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1441     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1442     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1443     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1444     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1445     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1446     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1447     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1448     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1449     ierr = VecDestroy(&x);CHKERRQ(ierr);
1450     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1451   }
1452 
1453   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1454   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1455   if (isseqaij) {
1456     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1457   } else {
1458     Mat work_mat;
1459     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1460     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1461     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1462   }
1463   if (matis->A->symmetric_set) {
1464     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1465 #if !defined(PETSC_USE_COMPLEX)
1466     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1467 #endif
1468   }
1469   /*
1470   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1471   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1472   */
1473   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1479 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1480 {
1481   PC_IS*          pcis = (PC_IS*)(pc->data);
1482   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1483   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1484   PetscInt        *idx_R_local=NULL;
1485   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1486   PetscInt        vbs,bs;
1487   PetscBT         bitmask=NULL;
1488   PetscErrorCode  ierr;
1489 
1490   PetscFunctionBegin;
1491   /*
1492     No need to setup local scatters if
1493       - primal space is unchanged
1494         AND
1495       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1496         AND
1497       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1498   */
1499   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1500     PetscFunctionReturn(0);
1501   }
1502   /* destroy old objects */
1503   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1504   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1505   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1506   /* Set Non-overlapping dimensions */
1507   n_B = pcis->n_B;
1508   n_D = pcis->n - n_B;
1509   n_vertices = pcbddc->n_vertices;
1510 
1511   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1512 
1513   /* create auxiliary bitmask and allocate workspace */
1514   if (!sub_schurs->reuse_mumps) {
1515     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1516     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1517     for (i=0;i<n_vertices;i++) {
1518       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1519     }
1520 
1521     for (i=0, n_R=0; i<pcis->n; i++) {
1522       if (!PetscBTLookup(bitmask,i)) {
1523         idx_R_local[n_R++] = i;
1524       }
1525     }
1526   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1527     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1528 
1529     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1530     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1531   }
1532 
1533   /* Block code */
1534   vbs = 1;
1535   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1536   if (bs>1 && !(n_vertices%bs)) {
1537     PetscBool is_blocked = PETSC_TRUE;
1538     PetscInt  *vary;
1539     if (!sub_schurs->reuse_mumps) {
1540       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1541       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1542       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1543       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1544       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1545       for (i=0; i<pcis->n/bs; i++) {
1546         if (vary[i]!=0 && vary[i]!=bs) {
1547           is_blocked = PETSC_FALSE;
1548           break;
1549         }
1550       }
1551       ierr = PetscFree(vary);CHKERRQ(ierr);
1552     } else {
1553       /* Verify directly the R set */
1554       for (i=0; i<n_R/bs; i++) {
1555         PetscInt j,node=idx_R_local[bs*i];
1556         for (j=1; j<bs; j++) {
1557           if (node != idx_R_local[bs*i+j]-j) {
1558             is_blocked = PETSC_FALSE;
1559             break;
1560           }
1561         }
1562       }
1563     }
1564     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1565       vbs = bs;
1566       for (i=0;i<n_R/vbs;i++) {
1567         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1568       }
1569     }
1570   }
1571   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1572   if (sub_schurs->reuse_mumps) {
1573     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1574 
1575     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1576     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1577     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1578     reuse_mumps->is_R = pcbddc->is_R_local;
1579   } else {
1580     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1581   }
1582 
1583   /* print some info if requested */
1584   if (pcbddc->dbg_flag) {
1585     PetscInt benign;
1586 
1587     benign = (PetscInt)(!!pcbddc->zerodiag);
1588     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1589     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1590     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1591     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1592     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1593     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr);
1594     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1595   }
1596 
1597   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1598   if (!sub_schurs->reuse_mumps) {
1599     IS       is_aux1,is_aux2;
1600     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1601 
1602     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1603     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1604     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1605     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1606     for (i=0; i<n_D; i++) {
1607       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1608     }
1609     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1610     for (i=0, j=0; i<n_R; i++) {
1611       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1612         aux_array1[j++] = i;
1613       }
1614     }
1615     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1616     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1617     for (i=0, j=0; i<n_B; i++) {
1618       if (!PetscBTLookup(bitmask,is_indices[i])) {
1619         aux_array2[j++] = i;
1620       }
1621     }
1622     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1623     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1624     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1625     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1626     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1627 
1628     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1629       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1630       for (i=0, j=0; i<n_R; i++) {
1631         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1632           aux_array1[j++] = i;
1633         }
1634       }
1635       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1636       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1637       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1638     }
1639     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1640     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1641   } else {
1642     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1643     IS               tis;
1644     PetscInt         schur_size;
1645 
1646     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1647     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1648     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1649     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1650     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1651       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1652       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1653       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1654     }
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1662 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1663 {
1664   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1665   PC_IS          *pcis = (PC_IS*)pc->data;
1666   PC             pc_temp;
1667   Mat            A_RR;
1668   MatReuse       reuse;
1669   PetscScalar    m_one = -1.0;
1670   PetscReal      value;
1671   PetscInt       n_D,n_R;
1672   PetscBool      use_exact,use_exact_reduced,issbaij;
1673   PetscErrorCode ierr;
1674   /* prefixes stuff */
1675   char           dir_prefix[256],neu_prefix[256],str_level[16];
1676   size_t         len;
1677 
1678   PetscFunctionBegin;
1679 
1680   /* compute prefixes */
1681   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1682   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1683   if (!pcbddc->current_level) {
1684     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1685     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1686     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1687     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1688   } else {
1689     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1690     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1691     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1692     len -= 15; /* remove "pc_bddc_coarse_" */
1693     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1694     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1695     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1696     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1697     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1698     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1699     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1700     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1701   }
1702 
1703   /* DIRICHLET PROBLEM */
1704   if (dirichlet) {
1705     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1706     if (pcbddc->local_mat->symmetric_set) {
1707       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1708     }
1709     /* Matrix for Dirichlet problem is pcis->A_II */
1710     n_D = pcis->n - pcis->n_B;
1711     if (!pcbddc->ksp_D) { /* create object if not yet build */
1712       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1713       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1714       /* default */
1715       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1716       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1717       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1718       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1719       if (issbaij) {
1720         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1721       } else {
1722         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1723       }
1724       /* Allow user's customization */
1725       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1726       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1727     }
1728     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1729     if (sub_schurs->reuse_mumps) {
1730       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1731 
1732       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1733     }
1734     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1735     if (!n_D) {
1736       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1737       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1738     }
1739     /* Set Up KSP for Dirichlet problem of BDDC */
1740     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1741     /* set ksp_D into pcis data */
1742     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1743     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1744     pcis->ksp_D = pcbddc->ksp_D;
1745   }
1746 
1747   /* NEUMANN PROBLEM */
1748   A_RR = 0;
1749   if (neumann) {
1750     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1751     PetscInt        ibs,mbs;
1752     PetscBool       issbaij;
1753     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1754     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1755     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1756     if (pcbddc->ksp_R) { /* already created ksp */
1757       PetscInt nn_R;
1758       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1759       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1760       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1761       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1762         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1763         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1764         reuse = MAT_INITIAL_MATRIX;
1765       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1766         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1767           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1768           reuse = MAT_INITIAL_MATRIX;
1769         } else { /* safe to reuse the matrix */
1770           reuse = MAT_REUSE_MATRIX;
1771         }
1772       }
1773       /* last check */
1774       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1775         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1776         reuse = MAT_INITIAL_MATRIX;
1777       }
1778     } else { /* first time, so we need to create the matrix */
1779       reuse = MAT_INITIAL_MATRIX;
1780     }
1781     /* extract A_RR */
1782     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1783     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1784     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1785     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1786       if (matis->A == pcbddc->local_mat) {
1787         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1788         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1789       } else {
1790         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1791       }
1792     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1793       if (matis->A == pcbddc->local_mat) {
1794         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1795         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1796       } else {
1797         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1798       }
1799     }
1800     if (!sub_schurs->reuse_mumps) {
1801       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1802       if (pcbddc->local_mat->symmetric_set) {
1803         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1804       }
1805     } else {
1806       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1807 
1808       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1809       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1810       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1811     }
1812     if (!pcbddc->ksp_R) { /* create object if not present */
1813       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1814       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1815       /* default */
1816       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1817       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1818       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1819       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1820       if (issbaij) {
1821         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1822       } else {
1823         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1824       }
1825       /* Allow user's customization */
1826       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1827       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1828     }
1829     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1830     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1831     if (!n_R) {
1832       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1833       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1834     }
1835     /* Reuse MUMPS solver if it is present */
1836     if (sub_schurs->reuse_mumps) {
1837       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1838 
1839       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1840     }
1841     /* Set Up KSP for Neumann problem of BDDC */
1842     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1843   }
1844   /* free Neumann problem's matrix */
1845   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1846 
1847   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1848   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1849     if (pcbddc->dbg_flag) {
1850       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1851       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1852       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1853     }
1854     if (dirichlet) { /* Dirichlet */
1855       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1856       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1857       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1858       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1859       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1860       /* need to be adapted? */
1861       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1862       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1863       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1864       /* print info */
1865       if (pcbddc->dbg_flag) {
1866         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1867         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1868       }
1869       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1870         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
1871       }
1872     }
1873     if (neumann) { /* Neumann */
1874       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1875       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1876       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1877       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1878       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1879       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1880       /* need to be adapted? */
1881       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1882       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1883       /* print info */
1884       if (pcbddc->dbg_flag) {
1885         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1886         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1887       }
1888       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1889         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
1890       }
1891     }
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1898 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1899 {
1900   PetscErrorCode  ierr;
1901   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1902   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1903 
1904   PetscFunctionBegin;
1905   if (!sub_schurs->reuse_mumps) {
1906     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1907   }
1908   if (!pcbddc->switch_static) {
1909     if (applytranspose && pcbddc->local_auxmat1) {
1910       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1911       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1912     }
1913     if (!sub_schurs->reuse_mumps) {
1914       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1915       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1916     } else {
1917       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1918 
1919       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1920       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1921     }
1922   } else {
1923     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1924     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1925     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1926     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1927     if (applytranspose && pcbddc->local_auxmat1) {
1928       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1929       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1930       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1931       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1932     }
1933   }
1934   if (!sub_schurs->reuse_mumps) {
1935     if (applytranspose) {
1936       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1937     } else {
1938       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1939     }
1940 #if defined(PETSC_HAVE_MUMPS)
1941   } else {
1942     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1943 
1944     if (applytranspose) {
1945       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1946     } else {
1947       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1948     }
1949 #endif
1950   }
1951   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1952   if (!pcbddc->switch_static) {
1953     if (!sub_schurs->reuse_mumps) {
1954       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1955       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1956     } else {
1957       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1958 
1959       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1960       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1961     }
1962     if (!applytranspose && pcbddc->local_auxmat1) {
1963       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1964       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1965     }
1966   } else {
1967     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1968     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1969     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1970     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1971     if (!applytranspose && pcbddc->local_auxmat1) {
1972       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1973       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1974     }
1975     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1976     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1977     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1978     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1979   }
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1984 #undef __FUNCT__
1985 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1986 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1987 {
1988   PetscErrorCode ierr;
1989   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1990   PC_IS*            pcis = (PC_IS*)  (pc->data);
1991   const PetscScalar zero = 0.0;
1992 
1993   PetscFunctionBegin;
1994   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1995   if (applytranspose) {
1996     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1997     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1998   } else {
1999     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2000     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2001   }
2002 
2003   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2004   if (pcbddc->benign_change) {
2005     PetscScalar *array;
2006 
2007     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2008     array[pcbddc->local_primal_size-1] += pcbddc->benign_p0;
2009     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2010   }
2011 
2012   /* start communications from local primal nodes to rhs of coarse solver */
2013   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2014   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2015   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2016 
2017   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2018   /* TODO remove null space when doing multilevel */
2019   if (pcbddc->coarse_ksp) {
2020     Vec rhs,sol;
2021 
2022     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2023     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2024     if (applytranspose) {
2025       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2026     } else {
2027       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2028     }
2029   }
2030 
2031   /* Local solution on R nodes */
2032   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2033     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2034   }
2035 
2036   /* communications from coarse sol to local primal nodes */
2037   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2038   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2039 
2040   /* Sum contributions from two levels */
2041   if (applytranspose) {
2042     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2043     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2044   } else {
2045     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2046     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2047   }
2048   /* store p0 */
2049   if (pcbddc->benign_change) {
2050     PetscScalar *array;
2051 
2052     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2053     pcbddc->benign_p0 = array[pcbddc->local_primal_size-1];
2054     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2055   }
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2061 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2062 {
2063   PetscErrorCode ierr;
2064   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2065   PetscScalar    *array;
2066   Vec            from,to;
2067 
2068   PetscFunctionBegin;
2069   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2070     from = pcbddc->coarse_vec;
2071     to = pcbddc->vec1_P;
2072     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2073       Vec tvec;
2074 
2075       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2076       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2077       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2078       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2079       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2080       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2081     }
2082   } else { /* from local to global -> put data in coarse right hand side */
2083     from = pcbddc->vec1_P;
2084     to = pcbddc->coarse_vec;
2085   }
2086   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2092 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2093 {
2094   PetscErrorCode ierr;
2095   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2096   PetscScalar    *array;
2097   Vec            from,to;
2098 
2099   PetscFunctionBegin;
2100   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2101     from = pcbddc->coarse_vec;
2102     to = pcbddc->vec1_P;
2103   } else { /* from local to global -> put data in coarse right hand side */
2104     from = pcbddc->vec1_P;
2105     to = pcbddc->coarse_vec;
2106   }
2107   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2108   if (smode == SCATTER_FORWARD) {
2109     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2110       Vec tvec;
2111 
2112       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2113       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2114       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2115       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2116     }
2117   } else {
2118     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2119      ierr = VecResetArray(from);CHKERRQ(ierr);
2120     }
2121   }
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 /* uncomment for testing purposes */
2126 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2127 #undef __FUNCT__
2128 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2129 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2130 {
2131   PetscErrorCode    ierr;
2132   PC_IS*            pcis = (PC_IS*)(pc->data);
2133   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2134   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2135   /* one and zero */
2136   PetscScalar       one=1.0,zero=0.0;
2137   /* space to store constraints and their local indices */
2138   PetscScalar       *constraints_data;
2139   PetscInt          *constraints_idxs,*constraints_idxs_B;
2140   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2141   PetscInt          *constraints_n;
2142   /* iterators */
2143   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2144   /* BLAS integers */
2145   PetscBLASInt      lwork,lierr;
2146   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2147   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2148   /* reuse */
2149   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2150   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2151   /* change of basis */
2152   PetscBool         qr_needed;
2153   PetscBT           change_basis,qr_needed_idx;
2154   /* auxiliary stuff */
2155   PetscInt          *nnz,*is_indices;
2156   PetscInt          ncc;
2157   /* some quantities */
2158   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2159   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2160 
2161   PetscFunctionBegin;
2162   /* Destroy Mat objects computed previously */
2163   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2164   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2165   /* save info on constraints from previous setup (if any) */
2166   olocal_primal_size = pcbddc->local_primal_size;
2167   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2168   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2169   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2170   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2171   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2172   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2173 
2174   /* print some info */
2175   if (pcbddc->dbg_flag) {
2176     IS       vertices;
2177     PetscInt nv,nedges,nfaces;
2178     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2179     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2180     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2181     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2182     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2183     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2184     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2185     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2186     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2187   }
2188 
2189   if (!pcbddc->adaptive_selection) {
2190     IS           ISForVertices,*ISForFaces,*ISForEdges;
2191     MatNullSpace nearnullsp;
2192     const Vec    *nearnullvecs;
2193     Vec          *localnearnullsp;
2194     PetscScalar  *array;
2195     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2196     PetscBool    nnsp_has_cnst;
2197     /* LAPACK working arrays for SVD or POD */
2198     PetscBool    skip_lapack,boolforchange;
2199     PetscScalar  *work;
2200     PetscReal    *singular_vals;
2201 #if defined(PETSC_USE_COMPLEX)
2202     PetscReal    *rwork;
2203 #endif
2204 #if defined(PETSC_MISSING_LAPACK_GESVD)
2205     PetscScalar  *temp_basis,*correlation_mat;
2206 #else
2207     PetscBLASInt dummy_int=1;
2208     PetscScalar  dummy_scalar=1.;
2209 #endif
2210 
2211     /* Get index sets for faces, edges and vertices from graph */
2212     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2213     /* free unneeded index sets */
2214     if (!pcbddc->use_vertices) {
2215       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2216     }
2217     if (!pcbddc->use_edges) {
2218       for (i=0;i<n_ISForEdges;i++) {
2219         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2220       }
2221       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2222       n_ISForEdges = 0;
2223     }
2224     if (!pcbddc->use_faces) {
2225       for (i=0;i<n_ISForFaces;i++) {
2226         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2227       }
2228       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2229       n_ISForFaces = 0;
2230     }
2231 
2232 #if defined(PETSC_USE_DEBUG)
2233     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2234        Also use_change_of_basis should be consistent among processors */
2235     if (pcbddc->NullSpace) {
2236       PetscBool tbool[2],gbool[2];
2237 
2238       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2239         pcbddc->use_change_of_basis = PETSC_TRUE;
2240         if (!ISForEdges) {
2241           pcbddc->use_change_on_faces = PETSC_TRUE;
2242         }
2243       }
2244       tbool[0] = pcbddc->use_change_of_basis;
2245       tbool[1] = pcbddc->use_change_on_faces;
2246       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2247       pcbddc->use_change_of_basis = gbool[0];
2248       pcbddc->use_change_on_faces = gbool[1];
2249     }
2250 #endif
2251 
2252     /* check if near null space is attached to global mat */
2253     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2254     if (nearnullsp) {
2255       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2256       /* remove any stored info */
2257       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2258       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2259       /* store information for BDDC solver reuse */
2260       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2261       pcbddc->onearnullspace = nearnullsp;
2262       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2263       for (i=0;i<nnsp_size;i++) {
2264         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2265       }
2266     } else { /* if near null space is not provided BDDC uses constants by default */
2267       nnsp_size = 0;
2268       nnsp_has_cnst = PETSC_TRUE;
2269     }
2270     /* get max number of constraints on a single cc */
2271     max_constraints = nnsp_size;
2272     if (nnsp_has_cnst) max_constraints++;
2273 
2274     /*
2275          Evaluate maximum storage size needed by the procedure
2276          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2277          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2278          There can be multiple constraints per connected component
2279                                                                                                                                                            */
2280     n_vertices = 0;
2281     if (ISForVertices) {
2282       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2283     }
2284     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2285     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2286 
2287     total_counts = n_ISForFaces+n_ISForEdges;
2288     total_counts *= max_constraints;
2289     total_counts += n_vertices;
2290     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2291 
2292     total_counts = 0;
2293     max_size_of_constraint = 0;
2294     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2295       IS used_is;
2296       if (i<n_ISForEdges) {
2297         used_is = ISForEdges[i];
2298       } else {
2299         used_is = ISForFaces[i-n_ISForEdges];
2300       }
2301       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2302       total_counts += j;
2303       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2304     }
2305     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2306 
2307     /* get local part of global near null space vectors */
2308     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2309     for (k=0;k<nnsp_size;k++) {
2310       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2311       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2312       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2313     }
2314 
2315     /* whether or not to skip lapack calls */
2316     skip_lapack = PETSC_TRUE;
2317     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2318 
2319     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2320     if (!skip_lapack) {
2321       PetscScalar temp_work;
2322 
2323 #if defined(PETSC_MISSING_LAPACK_GESVD)
2324       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2325       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2326       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2327       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2328 #if defined(PETSC_USE_COMPLEX)
2329       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2330 #endif
2331       /* now we evaluate the optimal workspace using query with lwork=-1 */
2332       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2333       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2334       lwork = -1;
2335       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2336 #if !defined(PETSC_USE_COMPLEX)
2337       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2338 #else
2339       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2340 #endif
2341       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2342       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2343 #else /* on missing GESVD */
2344       /* SVD */
2345       PetscInt max_n,min_n;
2346       max_n = max_size_of_constraint;
2347       min_n = max_constraints;
2348       if (max_size_of_constraint < max_constraints) {
2349         min_n = max_size_of_constraint;
2350         max_n = max_constraints;
2351       }
2352       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2353 #if defined(PETSC_USE_COMPLEX)
2354       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2355 #endif
2356       /* now we evaluate the optimal workspace using query with lwork=-1 */
2357       lwork = -1;
2358       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2359       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2360       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2361       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2362 #if !defined(PETSC_USE_COMPLEX)
2363       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2364 #else
2365       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2366 #endif
2367       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2368       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2369 #endif /* on missing GESVD */
2370       /* Allocate optimal workspace */
2371       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2372       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2373     }
2374     /* Now we can loop on constraining sets */
2375     total_counts = 0;
2376     constraints_idxs_ptr[0] = 0;
2377     constraints_data_ptr[0] = 0;
2378     /* vertices */
2379     if (n_vertices) {
2380       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2381       if (nnsp_has_cnst) { /* it considers all possible vertices */
2382         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2383         for (i=0;i<n_vertices;i++) {
2384           constraints_n[total_counts] = 1;
2385           constraints_data[total_counts] = 1.0;
2386           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2387           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2388           total_counts++;
2389         }
2390       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2391         PetscBool used_vertex;
2392         for (i=0;i<n_vertices;i++) {
2393           used_vertex = PETSC_FALSE;
2394           k = 0;
2395           while (!used_vertex && k<nnsp_size) {
2396             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2397             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2398               constraints_n[total_counts] = 1;
2399               constraints_idxs[total_counts] = is_indices[i];
2400               constraints_data[total_counts] = 1.0;
2401               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2402               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2403               total_counts++;
2404               used_vertex = PETSC_TRUE;
2405             }
2406             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2407             k++;
2408           }
2409         }
2410       }
2411       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2412       n_vertices = total_counts;
2413     }
2414 
2415     /* edges and faces */
2416     total_counts_cc = total_counts;
2417     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2418       IS        used_is;
2419       PetscBool idxs_copied = PETSC_FALSE;
2420 
2421       if (ncc<n_ISForEdges) {
2422         used_is = ISForEdges[ncc];
2423         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2424       } else {
2425         used_is = ISForFaces[ncc-n_ISForEdges];
2426         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2427       }
2428       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2429 
2430       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2431       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2432       /* change of basis should not be performed on local periodic nodes */
2433       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2434       if (nnsp_has_cnst) {
2435         PetscScalar quad_value;
2436 
2437         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2438         idxs_copied = PETSC_TRUE;
2439 
2440         if (!pcbddc->use_nnsp_true) {
2441           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2442         } else {
2443           quad_value = 1.0;
2444         }
2445         for (j=0;j<size_of_constraint;j++) {
2446           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2447         }
2448         temp_constraints++;
2449         total_counts++;
2450       }
2451       for (k=0;k<nnsp_size;k++) {
2452         PetscReal real_value;
2453         PetscScalar *ptr_to_data;
2454 
2455         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2456         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2457         for (j=0;j<size_of_constraint;j++) {
2458           ptr_to_data[j] = array[is_indices[j]];
2459         }
2460         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2461         /* check if array is null on the connected component */
2462         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2463         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2464         if (real_value > 0.0) { /* keep indices and values */
2465           temp_constraints++;
2466           total_counts++;
2467           if (!idxs_copied) {
2468             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2469             idxs_copied = PETSC_TRUE;
2470           }
2471         }
2472       }
2473       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2474       valid_constraints = temp_constraints;
2475       if (!pcbddc->use_nnsp_true && temp_constraints) {
2476         if (temp_constraints == 1) { /* just normalize the constraint */
2477           PetscScalar norm,*ptr_to_data;
2478 
2479           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2480           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2481           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2482           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2483           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2484         } else { /* perform SVD */
2485           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2486           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2487 
2488 #if defined(PETSC_MISSING_LAPACK_GESVD)
2489           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2490              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2491              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2492                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2493                 from that computed using LAPACKgesvd
2494              -> This is due to a different computation of eigenvectors in LAPACKheev
2495              -> The quality of the POD-computed basis will be the same */
2496           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2497           /* Store upper triangular part of correlation matrix */
2498           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2499           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2500           for (j=0;j<temp_constraints;j++) {
2501             for (k=0;k<j+1;k++) {
2502               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2503             }
2504           }
2505           /* compute eigenvalues and eigenvectors of correlation matrix */
2506           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2507           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2508 #if !defined(PETSC_USE_COMPLEX)
2509           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2510 #else
2511           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2512 #endif
2513           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2514           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2515           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2516           j = 0;
2517           while (j < temp_constraints && singular_vals[j] < tol) j++;
2518           total_counts = total_counts-j;
2519           valid_constraints = temp_constraints-j;
2520           /* scale and copy POD basis into used quadrature memory */
2521           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2522           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2523           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2524           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2525           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2526           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2527           if (j<temp_constraints) {
2528             PetscInt ii;
2529             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2530             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2531             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2532             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2533             for (k=0;k<temp_constraints-j;k++) {
2534               for (ii=0;ii<size_of_constraint;ii++) {
2535                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2536               }
2537             }
2538           }
2539 #else  /* on missing GESVD */
2540           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2541           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2542           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2543           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2544 #if !defined(PETSC_USE_COMPLEX)
2545           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2546 #else
2547           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2548 #endif
2549           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2550           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2551           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2552           k = temp_constraints;
2553           if (k > size_of_constraint) k = size_of_constraint;
2554           j = 0;
2555           while (j < k && singular_vals[k-j-1] < tol) j++;
2556           valid_constraints = k-j;
2557           total_counts = total_counts-temp_constraints+valid_constraints;
2558 #endif /* on missing GESVD */
2559         }
2560       }
2561       /* update pointers information */
2562       if (valid_constraints) {
2563         constraints_n[total_counts_cc] = valid_constraints;
2564         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2565         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2566         /* set change_of_basis flag */
2567         if (boolforchange) {
2568           PetscBTSet(change_basis,total_counts_cc);
2569         }
2570         total_counts_cc++;
2571       }
2572     }
2573     /* free workspace */
2574     if (!skip_lapack) {
2575       ierr = PetscFree(work);CHKERRQ(ierr);
2576 #if defined(PETSC_USE_COMPLEX)
2577       ierr = PetscFree(rwork);CHKERRQ(ierr);
2578 #endif
2579       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2580 #if defined(PETSC_MISSING_LAPACK_GESVD)
2581       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2582       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2583 #endif
2584     }
2585     for (k=0;k<nnsp_size;k++) {
2586       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2587     }
2588     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2589     /* free index sets of faces, edges and vertices */
2590     for (i=0;i<n_ISForFaces;i++) {
2591       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2592     }
2593     if (n_ISForFaces) {
2594       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2595     }
2596     for (i=0;i<n_ISForEdges;i++) {
2597       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2598     }
2599     if (n_ISForEdges) {
2600       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2601     }
2602     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2603   } else {
2604     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2605 
2606     total_counts = 0;
2607     n_vertices = 0;
2608     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2609       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2610     }
2611     max_constraints = 0;
2612     total_counts_cc = 0;
2613     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2614       total_counts += pcbddc->adaptive_constraints_n[i];
2615       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2616       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2617     }
2618     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2619     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2620     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2621     constraints_data = pcbddc->adaptive_constraints_data;
2622     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2623     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2624     total_counts_cc = 0;
2625     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2626       if (pcbddc->adaptive_constraints_n[i]) {
2627         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2628       }
2629     }
2630 #if 0
2631     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2632     for (i=0;i<total_counts_cc;i++) {
2633       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2634       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2635       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2636         printf(" %d",constraints_idxs[j]);
2637       }
2638       printf("\n");
2639       printf("number of cc: %d\n",constraints_n[i]);
2640     }
2641     for (i=0;i<n_vertices;i++) {
2642       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2643     }
2644     for (i=0;i<sub_schurs->n_subs;i++) {
2645       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2646     }
2647 #endif
2648 
2649     max_size_of_constraint = 0;
2650     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2651     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2652     /* Change of basis */
2653     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2654     if (pcbddc->use_change_of_basis) {
2655       for (i=0;i<sub_schurs->n_subs;i++) {
2656         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2657           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2658         }
2659       }
2660     }
2661   }
2662   pcbddc->local_primal_size = total_counts;
2663   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2664   ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2665 
2666   /* map constraints_idxs in boundary numbering */
2667   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2668   if (i != constraints_idxs_ptr[total_counts_cc]) {
2669     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2670   }
2671 
2672   /* Create constraint matrix */
2673   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2674   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2675   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2676 
2677   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2678   /* determine if a QR strategy is needed for change of basis */
2679   qr_needed = PETSC_FALSE;
2680   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2681   total_primal_vertices=0;
2682   pcbddc->local_primal_size_cc = 0;
2683   for (i=0;i<total_counts_cc;i++) {
2684     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2685     if (size_of_constraint == 1) {
2686       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2687       pcbddc->local_primal_size_cc += 1;
2688     } else if (PetscBTLookup(change_basis,i)) {
2689       for (k=0;k<constraints_n[i];k++) {
2690         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2691       }
2692       pcbddc->local_primal_size_cc += constraints_n[i];
2693       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2694         PetscBTSet(qr_needed_idx,i);
2695         qr_needed = PETSC_TRUE;
2696       }
2697     } else {
2698       pcbddc->local_primal_size_cc += 1;
2699     }
2700   }
2701   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2702   pcbddc->n_vertices = total_primal_vertices;
2703   /* permute indices in order to have a sorted set of vertices */
2704   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2705 
2706   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2707   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2708   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2709   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2710 
2711   /* nonzero structure of constraint matrix */
2712   /* and get reference dof for local constraints */
2713   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2714   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2715 
2716   j = total_primal_vertices;
2717   total_counts = total_primal_vertices;
2718   cum = total_primal_vertices;
2719   for (i=n_vertices;i<total_counts_cc;i++) {
2720     if (!PetscBTLookup(change_basis,i)) {
2721       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2722       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2723       cum++;
2724       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2725       for (k=0;k<constraints_n[i];k++) {
2726         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2727         nnz[j+k] = size_of_constraint;
2728       }
2729       j += constraints_n[i];
2730     }
2731   }
2732   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2733   ierr = PetscFree(nnz);CHKERRQ(ierr);
2734 
2735   /* set values in constraint matrix */
2736   for (i=0;i<total_primal_vertices;i++) {
2737     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2738   }
2739   total_counts = total_primal_vertices;
2740   for (i=n_vertices;i<total_counts_cc;i++) {
2741     if (!PetscBTLookup(change_basis,i)) {
2742       PetscInt *cols;
2743 
2744       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2745       cols = constraints_idxs+constraints_idxs_ptr[i];
2746       for (k=0;k<constraints_n[i];k++) {
2747         PetscInt    row = total_counts+k;
2748         PetscScalar *vals;
2749 
2750         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2751         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2752       }
2753       total_counts += constraints_n[i];
2754     }
2755   }
2756   /* assembling */
2757   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759 
2760   /*
2761   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2762   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2763   */
2764   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2765   if (pcbddc->use_change_of_basis) {
2766     /* dual and primal dofs on a single cc */
2767     PetscInt     dual_dofs,primal_dofs;
2768     /* working stuff for GEQRF */
2769     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2770     PetscBLASInt lqr_work;
2771     /* working stuff for UNGQR */
2772     PetscScalar  *gqr_work,lgqr_work_t;
2773     PetscBLASInt lgqr_work;
2774     /* working stuff for TRTRS */
2775     PetscScalar  *trs_rhs;
2776     PetscBLASInt Blas_NRHS;
2777     /* pointers for values insertion into change of basis matrix */
2778     PetscInt     *start_rows,*start_cols;
2779     PetscScalar  *start_vals;
2780     /* working stuff for values insertion */
2781     PetscBT      is_primal;
2782     PetscInt     *aux_primal_numbering_B;
2783     /* matrix sizes */
2784     PetscInt     global_size,local_size;
2785     /* temporary change of basis */
2786     Mat          localChangeOfBasisMatrix;
2787     /* extra space for debugging */
2788     PetscScalar  *dbg_work;
2789 
2790     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2791     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2792     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2793     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2794     /* nonzeros for local mat */
2795     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2796     for (i=0;i<pcis->n;i++) nnz[i]=1;
2797     for (i=n_vertices;i<total_counts_cc;i++) {
2798       if (PetscBTLookup(change_basis,i)) {
2799         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2800         if (PetscBTLookup(qr_needed_idx,i)) {
2801           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2802         } else {
2803           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2804           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2805         }
2806       }
2807     }
2808     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2809     ierr = PetscFree(nnz);CHKERRQ(ierr);
2810     /* Set initial identity in the matrix */
2811     for (i=0;i<pcis->n;i++) {
2812       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2813     }
2814 
2815     if (pcbddc->dbg_flag) {
2816       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2817       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2818     }
2819 
2820 
2821     /* Now we loop on the constraints which need a change of basis */
2822     /*
2823        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2824        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2825 
2826        Basic blocks of change of basis matrix T computed by
2827 
2828           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2829 
2830             | 1        0   ...        0         s_1/S |
2831             | 0        1   ...        0         s_2/S |
2832             |              ...                        |
2833             | 0        ...            1     s_{n-1}/S |
2834             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2835 
2836             with S = \sum_{i=1}^n s_i^2
2837             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2838                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2839 
2840           - QR decomposition of constraints otherwise
2841     */
2842     if (qr_needed) {
2843       /* space to store Q */
2844       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2845       /* first we issue queries for optimal work */
2846       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2847       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2848       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2849       lqr_work = -1;
2850       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2851       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2852       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2853       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2854       lgqr_work = -1;
2855       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2856       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2857       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2858       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2859       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2860       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2861       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2862       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2863       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2864       /* array to store scaling factors for reflectors */
2865       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2866       /* array to store rhs and solution of triangular solver */
2867       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2868       /* allocating workspace for check */
2869       if (pcbddc->dbg_flag) {
2870         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2871       }
2872     }
2873     /* array to store whether a node is primal or not */
2874     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2875     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2876     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2877     if (i != total_primal_vertices) {
2878       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2879     }
2880     for (i=0;i<total_primal_vertices;i++) {
2881       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2882     }
2883     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2884 
2885     /* loop on constraints and see whether or not they need a change of basis and compute it */
2886     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2887       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2888       if (PetscBTLookup(change_basis,total_counts)) {
2889         /* get constraint info */
2890         primal_dofs = constraints_n[total_counts];
2891         dual_dofs = size_of_constraint-primal_dofs;
2892 
2893         if (pcbddc->dbg_flag) {
2894           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2895         }
2896 
2897         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2898 
2899           /* copy quadrature constraints for change of basis check */
2900           if (pcbddc->dbg_flag) {
2901             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2902           }
2903           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2904           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2905 
2906           /* compute QR decomposition of constraints */
2907           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2908           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2909           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2910           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2911           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2912           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2913           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2914 
2915           /* explictly compute R^-T */
2916           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2917           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2918           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2919           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2920           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2921           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2922           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2923           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2924           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2925           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2926 
2927           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2928           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2929           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2930           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2931           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2932           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2933           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2934           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2935           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2936 
2937           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2938              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2939              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2940           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2941           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2942           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2943           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2944           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2945           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2946           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2947           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2948           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2949           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2950 
2951           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2952           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2953           /* insert cols for primal dofs */
2954           for (j=0;j<primal_dofs;j++) {
2955             start_vals = &qr_basis[j*size_of_constraint];
2956             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2957             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2958           }
2959           /* insert cols for dual dofs */
2960           for (j=0,k=0;j<dual_dofs;k++) {
2961             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2962               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2963               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2964               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2965               j++;
2966             }
2967           }
2968 
2969           /* check change of basis */
2970           if (pcbddc->dbg_flag) {
2971             PetscInt   ii,jj;
2972             PetscBool valid_qr=PETSC_TRUE;
2973             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2974             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2975             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2976             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2977             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2978             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2979             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2980             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2981             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2982             for (jj=0;jj<size_of_constraint;jj++) {
2983               for (ii=0;ii<primal_dofs;ii++) {
2984                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2985                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2986               }
2987             }
2988             if (!valid_qr) {
2989               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2990               for (jj=0;jj<size_of_constraint;jj++) {
2991                 for (ii=0;ii<primal_dofs;ii++) {
2992                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2993                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2994                   }
2995                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2996                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2997                   }
2998                 }
2999               }
3000             } else {
3001               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3002             }
3003           }
3004         } else { /* simple transformation block */
3005           PetscInt    row,col;
3006           PetscScalar val,norm;
3007 
3008           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3009           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3010           for (j=0;j<size_of_constraint;j++) {
3011             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3012             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3013             if (!PetscBTLookup(is_primal,row_B)) {
3014               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3015               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3016               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3017             } else {
3018               for (k=0;k<size_of_constraint;k++) {
3019                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3020                 if (row != col) {
3021                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3022                 } else {
3023                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3024                 }
3025                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3026               }
3027             }
3028           }
3029           if (pcbddc->dbg_flag) {
3030             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3031           }
3032         }
3033       } else {
3034         if (pcbddc->dbg_flag) {
3035           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3036         }
3037       }
3038     }
3039 
3040     /* free workspace */
3041     if (qr_needed) {
3042       if (pcbddc->dbg_flag) {
3043         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3044       }
3045       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3046       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3047       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3048       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3049       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3050     }
3051     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3052     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3053     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054 
3055     /* assembling of global change of variable */
3056     {
3057       Mat      tmat;
3058       PetscInt bs;
3059 
3060       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3061       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3062       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3063       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3064       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3065       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3066       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3067       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3068       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3069       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3070       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3071       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3072       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3073       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3074       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3075       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3076       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3077       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3078     }
3079     /* check */
3080     if (pcbddc->dbg_flag) {
3081       PetscReal error;
3082       Vec       x,x_change;
3083 
3084       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3085       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3086       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3087       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3088       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3089       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3090       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3091       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3092       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3093       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3094       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3095       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3096       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3097       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3098       ierr = VecDestroy(&x);CHKERRQ(ierr);
3099       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3100     }
3101 
3102     /* adapt sub_schurs computed (if any) */
3103     if (pcbddc->use_deluxe_scaling) {
3104       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3105       if (sub_schurs->S_Ej_all) {
3106         Mat S_new,tmat;
3107         IS is_all_N;
3108 
3109         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3110         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3111         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3112         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3113         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3114         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3115         sub_schurs->S_Ej_all = S_new;
3116         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3117         if (sub_schurs->sum_S_Ej_all) {
3118           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3119           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3120           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3121           sub_schurs->sum_S_Ej_all = S_new;
3122           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3123         }
3124         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3125       }
3126     }
3127     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3128   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3129     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3130     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3131   }
3132 
3133   /* set up change of basis context */
3134   if (pcbddc->ChangeOfBasisMatrix) {
3135     PCBDDCChange_ctx change_ctx;
3136 
3137     if (!pcbddc->new_global_mat) {
3138       PetscInt global_size,local_size;
3139 
3140       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3141       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3142       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3143       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3144       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3145       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3146       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3147       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3148       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3149     } else {
3150       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3151       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3152       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3153     }
3154     if (!pcbddc->user_ChangeOfBasisMatrix) {
3155       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3156       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3157     } else {
3158       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3159       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3160     }
3161     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3162     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3163     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   }
3166 
3167   /* add pressure dof to set of primal nodes for numbering purposes */
3168   if (pcbddc->zerodiag) {
3169     const PetscInt *idxs;
3170     PetscInt       nz;
3171 
3172     ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr);
3173     ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
3174     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = idxs[nz-1];
3175     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = idxs[nz-1];
3176     ierr = ISRestoreIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
3177     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3178     pcbddc->local_primal_size_cc++;
3179     pcbddc->local_primal_size++;
3180   }
3181 
3182   /* check if a new primal space has been introduced (also take into account benign trick) */
3183   pcbddc->new_primal_space_local = PETSC_TRUE;
3184   if (olocal_primal_size == pcbddc->local_primal_size) {
3185     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3186     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3187     if (!pcbddc->new_primal_space_local) {
3188       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3189       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3190     }
3191   }
3192   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3193   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3194   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3195 
3196   /* flush dbg viewer */
3197   if (pcbddc->dbg_flag) {
3198     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3199   }
3200 
3201   /* free workspace */
3202   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3203   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3204   if (!pcbddc->adaptive_selection) {
3205     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3206     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3207   } else {
3208     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3209                       pcbddc->adaptive_constraints_idxs_ptr,
3210                       pcbddc->adaptive_constraints_data_ptr,
3211                       pcbddc->adaptive_constraints_idxs,
3212                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3213     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3214     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3215   }
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 #undef __FUNCT__
3220 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3221 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3222 {
3223   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3224   PC_IS       *pcis = (PC_IS*)pc->data;
3225   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3226   PetscInt    ierr,i,vertex_size,N;
3227   PetscViewer viewer=pcbddc->dbg_viewer;
3228 
3229   PetscFunctionBegin;
3230   /* Reset previously computed graph */
3231   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3232   /* Init local Graph struct */
3233   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3234   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3235 
3236   /* Check validity of the csr graph passed in by the user */
3237   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3238     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3239   }
3240 
3241   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3242   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3243     PetscInt  *xadj,*adjncy;
3244     PetscInt  nvtxs;
3245     PetscBool flg_row=PETSC_FALSE;
3246 
3247     if (pcbddc->use_local_adj) {
3248 
3249       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3250       if (flg_row) {
3251         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3252         pcbddc->computed_rowadj = PETSC_TRUE;
3253       }
3254       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3255     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3256       IS                     is_dummy;
3257       ISLocalToGlobalMapping l2gmap_dummy;
3258       PetscInt               j,sum;
3259       PetscInt               *cxadj,*cadjncy;
3260       const PetscInt         *idxs;
3261       PCBDDCGraph            graph;
3262       PetscBT                is_on_boundary;
3263 
3264       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3265       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3266       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3267       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3268       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3269       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3270       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3271       if (flg_row) {
3272         graph->xadj = xadj;
3273         graph->adjncy = adjncy;
3274       }
3275       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3276       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3277       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3278 
3279       if (pcbddc->dbg_flag) {
3280         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3281         for (i=0;i<graph->ncc;i++) {
3282           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3283         }
3284       }
3285 
3286       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3287       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3288       for (i=0;i<pcis->n_B;i++) {
3289         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3290       }
3291       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3292 
3293       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3294       sum = 0;
3295       for (i=0;i<graph->ncc;i++) {
3296         PetscInt sizecc = 0;
3297         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3298           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3299             sizecc++;
3300           }
3301         }
3302         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3303           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3304             cxadj[graph->queue[j]] = sizecc;
3305           }
3306         }
3307         sum += sizecc*sizecc;
3308       }
3309       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3310       sum = 0;
3311       for (i=0;i<pcis->n;i++) {
3312         PetscInt temp = cxadj[i];
3313         cxadj[i] = sum;
3314         sum += temp;
3315       }
3316       cxadj[pcis->n] = sum;
3317       for (i=0;i<graph->ncc;i++) {
3318         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3319           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3320             PetscInt k,sizecc = 0;
3321             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3322               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3323                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3324                 sizecc++;
3325               }
3326             }
3327           }
3328         }
3329       }
3330       if (sum) {
3331         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3332       } else {
3333         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3334         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3335       }
3336       graph->xadj = 0;
3337       graph->adjncy = 0;
3338       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3339       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3340     }
3341   }
3342   if (pcbddc->dbg_flag) {
3343     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3344   }
3345 
3346   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3347   vertex_size = 1;
3348   if (pcbddc->user_provided_isfordofs) {
3349     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3350       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3351       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3352         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3353         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3354       }
3355       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3356       pcbddc->n_ISForDofs = 0;
3357       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3358     }
3359     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3360     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3361   } else {
3362     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3363       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3364       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3365       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3366         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3367       }
3368     }
3369   }
3370 
3371   /* Setup of Graph */
3372   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3373     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3374   }
3375   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3376     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3377   }
3378   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3379 
3380   /* Graph's connected components analysis */
3381   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3382 
3383   /* print some info to stdout */
3384   if (pcbddc->dbg_flag) {
3385     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3386   }
3387 
3388   /* mark topography has done */
3389   pcbddc->recompute_topography = PETSC_FALSE;
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3394 #undef __FUNCT__
3395 #define __FUNCT__ "PCBDDCSubsetNumbering"
3396 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3397 {
3398   PetscSF        sf;
3399   PetscLayout    map;
3400   const PetscInt *idxs;
3401   PetscInt       *leaf_data,*root_data,*gidxs;
3402   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3403   PetscInt       n_n,nlocals,start,first_index;
3404   PetscMPIInt    commsize;
3405   PetscBool      first_found;
3406   PetscErrorCode ierr;
3407 
3408   PetscFunctionBegin;
3409   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3410   if (subset_mult) {
3411     PetscCheckSameComm(subset,1,subset_mult,2);
3412     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3413     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3414   }
3415   /* create workspace layout for computing global indices of subset */
3416   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3417   lbounds[0] = lbounds[1] = 0;
3418   for (i=0;i<n;i++) {
3419     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3420     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3421   }
3422   lbounds[0] = -lbounds[0];
3423   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3424   gbounds[0] = -gbounds[0];
3425   N = gbounds[1] - gbounds[0] + 1;
3426   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3427   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3428   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3429   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3430   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3431 
3432   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3433   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3434   if (subset_mult) {
3435     const PetscInt* idxs_mult;
3436 
3437     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3438     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3439     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3440   } else {
3441     for (i=0;i<n;i++) leaf_data[i] = 1;
3442   }
3443   /* local size of new subset */
3444   n_n = 0;
3445   for (i=0;i<n;i++) n_n += leaf_data[i];
3446 
3447   /* global indexes in layout */
3448   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3449   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3450   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3451   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3452   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3453   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3454 
3455   /* reduce from leaves to roots */
3456   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3457   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3458   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3459 
3460   /* count indexes in local part of layout */
3461   nlocals = 0;
3462   first_index = -1;
3463   first_found = PETSC_FALSE;
3464   for (i=0;i<Nl;i++) {
3465     if (!first_found && root_data[i]) {
3466       first_found = PETSC_TRUE;
3467       first_index = i;
3468     }
3469     nlocals += root_data[i];
3470   }
3471 
3472   /* cumulative of number of indexes and size of subset without holes */
3473 #if defined(PETSC_HAVE_MPI_EXSCAN)
3474   start = 0;
3475   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3476 #else
3477   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3478   start = start-nlocals;
3479 #endif
3480 
3481   if (N_n) { /* compute total size of new subset if requested */
3482     *N_n = start + nlocals;
3483     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3484     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3485   }
3486 
3487   /* adapt root data with cumulative */
3488   if (first_found) {
3489     PetscInt old_index;
3490 
3491     root_data[first_index] += start;
3492     old_index = first_index;
3493     for (i=first_index+1;i<Nl;i++) {
3494       if (root_data[i]) {
3495         root_data[i] += root_data[old_index];
3496         old_index = i;
3497       }
3498     }
3499   }
3500 
3501   /* from roots to leaves */
3502   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3503   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3504   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3505 
3506   /* create new IS with global indexes without holes */
3507   if (subset_mult) {
3508     const PetscInt* idxs_mult;
3509     PetscInt        cum;
3510 
3511     cum = 0;
3512     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3513     for (i=0;i<n;i++) {
3514       PetscInt j;
3515       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3516     }
3517     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3518   } else {
3519     for (i=0;i<n;i++) {
3520       gidxs[i] = leaf_data[i]-1;
3521     }
3522   }
3523   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3524   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3525   PetscFunctionReturn(0);
3526 }
3527 
3528 #undef __FUNCT__
3529 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3530 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3531 {
3532   PetscInt       i,j;
3533   PetscScalar    *alphas;
3534   PetscErrorCode ierr;
3535 
3536   PetscFunctionBegin;
3537   /* this implements stabilized Gram-Schmidt */
3538   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3539   for (i=0;i<n;i++) {
3540     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3541     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3542     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3543   }
3544   ierr = PetscFree(alphas);CHKERRQ(ierr);
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 #undef __FUNCT__
3549 #define __FUNCT__ "MatISGetSubassemblingPattern"
3550 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3551 {
3552   IS             ranks_send_to;
3553   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3554   PetscMPIInt    size,rank,color;
3555   PetscInt       *xadj,*adjncy;
3556   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3557   PetscInt       i,local_size,threshold=0;
3558   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3559   PetscSubcomm   subcomm;
3560   PetscErrorCode ierr;
3561 
3562   PetscFunctionBegin;
3563   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3564   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3565   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3566 
3567   /* Get info on mapping */
3568   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3569   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3570 
3571   /* build local CSR graph of subdomains' connectivity */
3572   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3573   xadj[0] = 0;
3574   xadj[1] = PetscMax(n_neighs-1,0);
3575   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3576   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3577 
3578   if (threshold) {
3579     PetscInt xadj_count = 0;
3580     for (i=1;i<n_neighs;i++) {
3581       if (n_shared[i] > threshold) {
3582         adjncy[xadj_count] = neighs[i];
3583         adjncy_wgt[xadj_count] = n_shared[i];
3584         xadj_count++;
3585       }
3586     }
3587     xadj[1] = xadj_count;
3588   } else {
3589     if (xadj[1]) {
3590       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3591       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3592     }
3593   }
3594   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3595   if (use_square) {
3596     for (i=0;i<xadj[1];i++) {
3597       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3598     }
3599   }
3600   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3601 
3602   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3603 
3604   /*
3605     Restrict work on active processes only.
3606   */
3607   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3608   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3609   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3610   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3611   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3612   if (color) {
3613     ierr = PetscFree(xadj);CHKERRQ(ierr);
3614     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3615     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3616   } else {
3617     Mat             subdomain_adj;
3618     IS              new_ranks,new_ranks_contig;
3619     MatPartitioning partitioner;
3620     PetscInt        prank,rstart=0,rend=0;
3621     PetscInt        *is_indices,*oldranks;
3622     PetscBool       aggregate;
3623 
3624     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3625     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3626     prank = rank;
3627     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3628     /*
3629     for (i=0;i<size;i++) {
3630       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3631     }
3632     */
3633     for (i=0;i<xadj[1];i++) {
3634       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3635     }
3636     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3637     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3638     if (aggregate) {
3639       PetscInt    lrows,row,ncols,*cols;
3640       PetscMPIInt nrank;
3641       PetscScalar *vals;
3642 
3643       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3644       lrows = 0;
3645       if (nrank<redprocs) {
3646         lrows = size/redprocs;
3647         if (nrank<size%redprocs) lrows++;
3648       }
3649       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3650       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3651       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3652       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3653       row = nrank;
3654       ncols = xadj[1]-xadj[0];
3655       cols = adjncy;
3656       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3657       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3658       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3659       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3660       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3661       ierr = PetscFree(xadj);CHKERRQ(ierr);
3662       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3663       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3664       ierr = PetscFree(vals);CHKERRQ(ierr);
3665     } else {
3666       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3667     }
3668     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3669 
3670     /* Partition */
3671     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3672     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3673     if (use_vwgt) {
3674       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3675       v_wgt[0] = local_size;
3676       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3677     }
3678     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3679     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3680     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3681     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3682     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3683 
3684     /* renumber new_ranks to avoid "holes" in new set of processors */
3685     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3686     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3687     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3688     if (!redprocs) {
3689       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3690     } else {
3691       PetscInt    idxs[1];
3692       PetscMPIInt tag;
3693       MPI_Request *reqs;
3694 
3695       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3696       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3697       for (i=rstart;i<rend;i++) {
3698         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3699       }
3700       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3701       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3702       ierr = PetscFree(reqs);CHKERRQ(ierr);
3703       ranks_send_to_idx[0] = oldranks[idxs[0]];
3704     }
3705     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3706     /* clean up */
3707     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3708     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3709     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3710     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3711   }
3712   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3713 
3714   /* assemble parallel IS for sends */
3715   i = 1;
3716   if (color) i=0;
3717   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3718   /* get back IS */
3719   *is_sends = ranks_send_to;
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3724 
3725 #undef __FUNCT__
3726 #define __FUNCT__ "MatISSubassemble"
3727 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3728 {
3729   Mat                    local_mat;
3730   IS                     is_sends_internal;
3731   PetscInt               rows,cols,new_local_rows;
3732   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3733   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3734   ISLocalToGlobalMapping l2gmap;
3735   PetscInt*              l2gmap_indices;
3736   const PetscInt*        is_indices;
3737   MatType                new_local_type;
3738   /* buffers */
3739   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3740   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3741   PetscInt               *recv_buffer_idxs_local;
3742   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3743   /* MPI */
3744   MPI_Comm               comm,comm_n;
3745   PetscSubcomm           subcomm;
3746   PetscMPIInt            n_sends,n_recvs,commsize;
3747   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3748   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3749   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3750   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3751   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3752   PetscErrorCode         ierr;
3753 
3754   PetscFunctionBegin;
3755   /* TODO: add missing checks */
3756   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3757   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3758   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3759   PetscValidLogicalCollectiveInt(mat,nis,7);
3760   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3761   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3762   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3763   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3764   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3765   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3766   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3767   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3768     PetscInt mrows,mcols,mnrows,mncols;
3769     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3770     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3771     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3772     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3773     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3774     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3775   }
3776   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3777   PetscValidLogicalCollectiveInt(mat,bs,0);
3778   /* prepare IS for sending if not provided */
3779   if (!is_sends) {
3780     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3781     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3782   } else {
3783     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3784     is_sends_internal = is_sends;
3785   }
3786 
3787   /* get comm */
3788   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3789 
3790   /* compute number of sends */
3791   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3792   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3793 
3794   /* compute number of receives */
3795   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3796   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3797   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3798   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3799   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3800   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3801   ierr = PetscFree(iflags);CHKERRQ(ierr);
3802 
3803   /* restrict comm if requested */
3804   subcomm = 0;
3805   destroy_mat = PETSC_FALSE;
3806   if (restrict_comm) {
3807     PetscMPIInt color,subcommsize;
3808 
3809     color = 0;
3810     if (restrict_full) {
3811       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3812     } else {
3813       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3814     }
3815     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3816     subcommsize = commsize - subcommsize;
3817     /* check if reuse has been requested */
3818     if (reuse == MAT_REUSE_MATRIX) {
3819       if (*mat_n) {
3820         PetscMPIInt subcommsize2;
3821         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3822         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3823         comm_n = PetscObjectComm((PetscObject)*mat_n);
3824       } else {
3825         comm_n = PETSC_COMM_SELF;
3826       }
3827     } else { /* MAT_INITIAL_MATRIX */
3828       PetscMPIInt rank;
3829 
3830       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3831       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3832       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3833       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3834       comm_n = PetscSubcommChild(subcomm);
3835     }
3836     /* flag to destroy *mat_n if not significative */
3837     if (color) destroy_mat = PETSC_TRUE;
3838   } else {
3839     comm_n = comm;
3840   }
3841 
3842   /* prepare send/receive buffers */
3843   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3844   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3845   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3846   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3847   if (nis) {
3848     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3849   }
3850 
3851   /* Get data from local matrices */
3852   if (!isdense) {
3853     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3854     /* TODO: See below some guidelines on how to prepare the local buffers */
3855     /*
3856        send_buffer_vals should contain the raw values of the local matrix
3857        send_buffer_idxs should contain:
3858        - MatType_PRIVATE type
3859        - PetscInt        size_of_l2gmap
3860        - PetscInt        global_row_indices[size_of_l2gmap]
3861        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3862     */
3863   } else {
3864     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3865     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
3866     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3867     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3868     send_buffer_idxs[1] = i;
3869     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3870     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3871     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3872     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3873     for (i=0;i<n_sends;i++) {
3874       ilengths_vals[is_indices[i]] = len*len;
3875       ilengths_idxs[is_indices[i]] = len+2;
3876     }
3877   }
3878   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3879   /* additional is (if any) */
3880   if (nis) {
3881     PetscMPIInt psum;
3882     PetscInt j;
3883     for (j=0,psum=0;j<nis;j++) {
3884       PetscInt plen;
3885       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3886       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3887       psum += len+1; /* indices + lenght */
3888     }
3889     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3890     for (j=0,psum=0;j<nis;j++) {
3891       PetscInt plen;
3892       const PetscInt *is_array_idxs;
3893       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3894       send_buffer_idxs_is[psum] = plen;
3895       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3896       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3897       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3898       psum += plen+1; /* indices + lenght */
3899     }
3900     for (i=0;i<n_sends;i++) {
3901       ilengths_idxs_is[is_indices[i]] = psum;
3902     }
3903     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3904   }
3905 
3906   buf_size_idxs = 0;
3907   buf_size_vals = 0;
3908   buf_size_idxs_is = 0;
3909   for (i=0;i<n_recvs;i++) {
3910     buf_size_idxs += (PetscInt)olengths_idxs[i];
3911     buf_size_vals += (PetscInt)olengths_vals[i];
3912     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3913   }
3914   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3915   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3916   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3917 
3918   /* get new tags for clean communications */
3919   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3920   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3921   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3922 
3923   /* allocate for requests */
3924   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3925   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3926   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3927   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3928   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3929   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3930 
3931   /* communications */
3932   ptr_idxs = recv_buffer_idxs;
3933   ptr_vals = recv_buffer_vals;
3934   ptr_idxs_is = recv_buffer_idxs_is;
3935   for (i=0;i<n_recvs;i++) {
3936     source_dest = onodes[i];
3937     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3938     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3939     ptr_idxs += olengths_idxs[i];
3940     ptr_vals += olengths_vals[i];
3941     if (nis) {
3942       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3943       ptr_idxs_is += olengths_idxs_is[i];
3944     }
3945   }
3946   for (i=0;i<n_sends;i++) {
3947     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3948     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3949     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3950     if (nis) {
3951       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3952     }
3953   }
3954   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3955   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3956 
3957   /* assemble new l2g map */
3958   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3959   ptr_idxs = recv_buffer_idxs;
3960   new_local_rows = 0;
3961   for (i=0;i<n_recvs;i++) {
3962     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3963     ptr_idxs += olengths_idxs[i];
3964   }
3965   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3966   ptr_idxs = recv_buffer_idxs;
3967   new_local_rows = 0;
3968   for (i=0;i<n_recvs;i++) {
3969     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3970     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3971     ptr_idxs += olengths_idxs[i];
3972   }
3973   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3974   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3975   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3976 
3977   /* infer new local matrix type from received local matrices type */
3978   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3979   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3980   if (n_recvs) {
3981     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3982     ptr_idxs = recv_buffer_idxs;
3983     for (i=0;i<n_recvs;i++) {
3984       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3985         new_local_type_private = MATAIJ_PRIVATE;
3986         break;
3987       }
3988       ptr_idxs += olengths_idxs[i];
3989     }
3990     switch (new_local_type_private) {
3991       case MATDENSE_PRIVATE:
3992         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3993           new_local_type = MATSEQAIJ;
3994           bs = 1;
3995         } else { /* if I receive only 1 dense matrix */
3996           new_local_type = MATSEQDENSE;
3997           bs = 1;
3998         }
3999         break;
4000       case MATAIJ_PRIVATE:
4001         new_local_type = MATSEQAIJ;
4002         bs = 1;
4003         break;
4004       case MATBAIJ_PRIVATE:
4005         new_local_type = MATSEQBAIJ;
4006         break;
4007       case MATSBAIJ_PRIVATE:
4008         new_local_type = MATSEQSBAIJ;
4009         break;
4010       default:
4011         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4012         break;
4013     }
4014   } else { /* by default, new_local_type is seqdense */
4015     new_local_type = MATSEQDENSE;
4016     bs = 1;
4017   }
4018 
4019   /* create MATIS object if needed */
4020   if (reuse == MAT_INITIAL_MATRIX) {
4021     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4022     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4023   } else {
4024     /* it also destroys the local matrices */
4025     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4026   }
4027   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4028   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4029 
4030   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4031 
4032   /* Global to local map of received indices */
4033   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4034   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4035   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4036 
4037   /* restore attributes -> type of incoming data and its size */
4038   buf_size_idxs = 0;
4039   for (i=0;i<n_recvs;i++) {
4040     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4041     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4042     buf_size_idxs += (PetscInt)olengths_idxs[i];
4043   }
4044   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4045 
4046   /* set preallocation */
4047   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4048   if (!newisdense) {
4049     PetscInt *new_local_nnz=0;
4050 
4051     ptr_vals = recv_buffer_vals;
4052     ptr_idxs = recv_buffer_idxs_local;
4053     if (n_recvs) {
4054       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4055     }
4056     for (i=0;i<n_recvs;i++) {
4057       PetscInt j;
4058       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4059         for (j=0;j<*(ptr_idxs+1);j++) {
4060           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4061         }
4062       } else {
4063         /* TODO */
4064       }
4065       ptr_idxs += olengths_idxs[i];
4066     }
4067     if (new_local_nnz) {
4068       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4069       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4070       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4071       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4072       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4073       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4074     } else {
4075       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4076     }
4077     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4078   } else {
4079     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4080   }
4081 
4082   /* set values */
4083   ptr_vals = recv_buffer_vals;
4084   ptr_idxs = recv_buffer_idxs_local;
4085   for (i=0;i<n_recvs;i++) {
4086     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4087       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4088       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4089       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4090       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4091       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4092     } else {
4093       /* TODO */
4094     }
4095     ptr_idxs += olengths_idxs[i];
4096     ptr_vals += olengths_vals[i];
4097   }
4098   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4099   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4100   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4101   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4102   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4103   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4104 
4105 #if 0
4106   if (!restrict_comm) { /* check */
4107     Vec       lvec,rvec;
4108     PetscReal infty_error;
4109 
4110     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4111     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4112     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4113     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4114     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4115     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4116     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4117     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4118     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4119   }
4120 #endif
4121 
4122   /* assemble new additional is (if any) */
4123   if (nis) {
4124     PetscInt **temp_idxs,*count_is,j,psum;
4125 
4126     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4127     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4128     ptr_idxs = recv_buffer_idxs_is;
4129     psum = 0;
4130     for (i=0;i<n_recvs;i++) {
4131       for (j=0;j<nis;j++) {
4132         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4133         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4134         psum += plen;
4135         ptr_idxs += plen+1; /* shift pointer to received data */
4136       }
4137     }
4138     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4139     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4140     for (i=1;i<nis;i++) {
4141       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4142     }
4143     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4144     ptr_idxs = recv_buffer_idxs_is;
4145     for (i=0;i<n_recvs;i++) {
4146       for (j=0;j<nis;j++) {
4147         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4148         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4149         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4150         ptr_idxs += plen+1; /* shift pointer to received data */
4151       }
4152     }
4153     for (i=0;i<nis;i++) {
4154       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4155       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4156       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4157     }
4158     ierr = PetscFree(count_is);CHKERRQ(ierr);
4159     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4160     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4161   }
4162   /* free workspace */
4163   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4164   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4165   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4166   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4167   if (isdense) {
4168     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4169     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4170   } else {
4171     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4172   }
4173   if (nis) {
4174     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4175     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4176   }
4177   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4178   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4179   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4180   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4181   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4182   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4183   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4184   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4185   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4186   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4187   ierr = PetscFree(onodes);CHKERRQ(ierr);
4188   if (nis) {
4189     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4190     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4191     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4192   }
4193   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4194   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4195     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4196     for (i=0;i<nis;i++) {
4197       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4198     }
4199     *mat_n = NULL;
4200   }
4201   PetscFunctionReturn(0);
4202 }
4203 
4204 /* temporary hack into ksp private data structure */
4205 #include <petsc/private/kspimpl.h>
4206 
4207 #undef __FUNCT__
4208 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4209 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4210 {
4211   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4212   PC_IS                  *pcis = (PC_IS*)pc->data;
4213   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4214   MatNullSpace           CoarseNullSpace=NULL;
4215   ISLocalToGlobalMapping coarse_islg;
4216   IS                     coarse_is,*isarray;
4217   PetscInt               i,im_active=-1,active_procs=-1;
4218   PetscInt               nis,nisdofs,nisneu;
4219   PC                     pc_temp;
4220   PCType                 coarse_pc_type;
4221   KSPType                coarse_ksp_type;
4222   PetscBool              multilevel_requested,multilevel_allowed;
4223   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4224   Mat                    t_coarse_mat_is;
4225   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4226   PetscMPIInt            all_procs;
4227   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4228   PetscBool              compute_vecs = PETSC_FALSE;
4229   PetscScalar            *array;
4230   PetscErrorCode         ierr;
4231 
4232   PetscFunctionBegin;
4233   /* Assign global numbering to coarse dofs */
4234   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4235     PetscInt ocoarse_size;
4236     compute_vecs = PETSC_TRUE;
4237     ocoarse_size = pcbddc->coarse_size;
4238     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4239     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4240     /* see if we can avoid some work */
4241     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4242       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4243       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4244         PC        pc;
4245         PetscBool isbddc;
4246 
4247         /* temporary workaround since PCBDDC does not have a reset method so far */
4248         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4249         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4250         if (isbddc) {
4251           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4252         }
4253         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4254         coarse_reuse = PETSC_FALSE;
4255       } else { /* we can safely reuse already computed coarse matrix */
4256         coarse_reuse = PETSC_TRUE;
4257       }
4258     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4259       coarse_reuse = PETSC_FALSE;
4260     }
4261     /* reset any subassembling information */
4262     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4263     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4264   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4265     coarse_reuse = PETSC_TRUE;
4266   }
4267 
4268   /* count "active" (i.e. with positive local size) and "void" processes */
4269   im_active = !!(pcis->n);
4270   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4272   void_procs = all_procs-active_procs;
4273   csin_type_simple = PETSC_TRUE;
4274   redist = PETSC_FALSE;
4275   if (pcbddc->current_level && void_procs) {
4276     csin_ml = PETSC_TRUE;
4277     ncoarse_ml = void_procs;
4278     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4279     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4280       csin_ds = PETSC_TRUE;
4281       ncoarse_ds = pcbddc->redistribute_coarse;
4282       redist = PETSC_TRUE;
4283     } else {
4284       csin_ds = PETSC_TRUE;
4285       ncoarse_ds = active_procs;
4286       redist = PETSC_TRUE;
4287     }
4288   } else {
4289     csin_ml = PETSC_FALSE;
4290     ncoarse_ml = all_procs;
4291     if (void_procs) {
4292       csin_ds = PETSC_TRUE;
4293       ncoarse_ds = void_procs;
4294       csin_type_simple = PETSC_FALSE;
4295     } else {
4296       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4297         csin_ds = PETSC_TRUE;
4298         ncoarse_ds = pcbddc->redistribute_coarse;
4299         redist = PETSC_TRUE;
4300       } else {
4301         csin_ds = PETSC_FALSE;
4302         ncoarse_ds = all_procs;
4303       }
4304     }
4305   }
4306 
4307   /*
4308     test if we can go multilevel: three conditions must be satisfied:
4309     - we have not exceeded the number of levels requested
4310     - we can actually subassemble the active processes
4311     - we can find a suitable number of MPI processes where we can place the subassembled problem
4312   */
4313   multilevel_allowed = PETSC_FALSE;
4314   multilevel_requested = PETSC_FALSE;
4315   if (pcbddc->current_level < pcbddc->max_levels) {
4316     multilevel_requested = PETSC_TRUE;
4317     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4318       multilevel_allowed = PETSC_FALSE;
4319     } else {
4320       multilevel_allowed = PETSC_TRUE;
4321     }
4322   }
4323   /* determine number of process partecipating to coarse solver */
4324   if (multilevel_allowed) {
4325     ncoarse = ncoarse_ml;
4326     csin = csin_ml;
4327     redist = PETSC_FALSE;
4328   } else {
4329     ncoarse = ncoarse_ds;
4330     csin = csin_ds;
4331   }
4332 
4333   /* creates temporary l2gmap and IS for coarse indexes */
4334   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4335   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4336 
4337   /* creates temporary MATIS object for coarse matrix */
4338   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4339   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4340   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4341   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4342 #if 0
4343   {
4344     PetscViewer viewer;
4345     char filename[256];
4346     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4347     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4348     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4349     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4350     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4351   }
4352 #endif
4353   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4354   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4355   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4356   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4357   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4358 
4359   /* compute dofs splitting and neumann boundaries for coarse dofs */
4360   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4361     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4362     const PetscInt         *idxs;
4363     ISLocalToGlobalMapping tmap;
4364 
4365     /* create map between primal indices (in local representative ordering) and local primal numbering */
4366     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4367     /* allocate space for temporary storage */
4368     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4369     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4370     /* allocate for IS array */
4371     nisdofs = pcbddc->n_ISForDofsLocal;
4372     nisneu = !!pcbddc->NeumannBoundariesLocal;
4373     nis = nisdofs + nisneu;
4374     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4375     /* dofs splitting */
4376     for (i=0;i<nisdofs;i++) {
4377       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4378       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4379       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4380       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4381       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4382       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4383       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4384       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4385     }
4386     /* neumann boundaries */
4387     if (pcbddc->NeumannBoundariesLocal) {
4388       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4389       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4390       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4391       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4392       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4393       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4394       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4395       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4396     }
4397     /* free memory */
4398     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4399     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4400     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4401   } else {
4402     nis = 0;
4403     nisdofs = 0;
4404     nisneu = 0;
4405     isarray = NULL;
4406   }
4407   /* destroy no longer needed map */
4408   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4409 
4410   /* restrict on coarse candidates (if needed) */
4411   coarse_mat_is = NULL;
4412   if (csin) {
4413     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4414       if (redist) {
4415         PetscMPIInt rank;
4416         PetscInt    spc,n_spc_p1,dest[1],destsize;
4417 
4418         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4419         spc = active_procs/ncoarse;
4420         n_spc_p1 = active_procs%ncoarse;
4421         if (im_active) {
4422           destsize = 1;
4423           if (rank > n_spc_p1*(spc+1)-1) {
4424             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4425           } else {
4426             dest[0] = rank/(spc+1);
4427           }
4428         } else {
4429           destsize = 0;
4430         }
4431         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4432       } else if (csin_type_simple) {
4433         PetscMPIInt rank;
4434         PetscInt    issize,isidx;
4435 
4436         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4437         if (im_active) {
4438           issize = 1;
4439           isidx = (PetscInt)rank;
4440         } else {
4441           issize = 0;
4442           isidx = -1;
4443         }
4444         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4445       } else { /* get a suitable subassembling pattern from MATIS code */
4446         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4447       }
4448 
4449       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4450       if (!redist || ncoarse <= void_procs) {
4451         PetscInt ncoarse_cand,tissize,*nisindices;
4452         PetscInt *coarse_candidates;
4453         const PetscInt* tisindices;
4454 
4455         /* get coarse candidates' ranks in pc communicator */
4456         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4457         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4458         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4459           if (!coarse_candidates[i]) {
4460             coarse_candidates[ncoarse_cand++]=i;
4461           }
4462         }
4463         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4464 
4465 
4466         if (pcbddc->dbg_flag) {
4467           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4468           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4469           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4470           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4471           for (i=0;i<ncoarse_cand;i++) {
4472             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4473           }
4474           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4475           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4476         }
4477         /* shift the pattern on coarse candidates */
4478         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4479         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4480         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4481         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4482         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4483         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4484         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4485       }
4486       if (pcbddc->dbg_flag) {
4487         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4488         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4489         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4490         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4491       }
4492     }
4493     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4494     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4495       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4496     } else { /* this is the last level, so use just receiving processes in subcomm */
4497       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4498     }
4499   } else {
4500     if (pcbddc->dbg_flag) {
4501       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4502       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4503       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4504     }
4505     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4506     coarse_mat_is = t_coarse_mat_is;
4507   }
4508 
4509   /* create local to global scatters for coarse problem */
4510   if (compute_vecs) {
4511     PetscInt lrows;
4512     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4513     if (coarse_mat_is) {
4514       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4515     } else {
4516       lrows = 0;
4517     }
4518     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4519     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4520     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4521     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4522     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4523   }
4524   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4525   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4526 
4527   /* set defaults for coarse KSP and PC */
4528   if (multilevel_allowed) {
4529     coarse_ksp_type = KSPRICHARDSON;
4530     coarse_pc_type = PCBDDC;
4531   } else {
4532     coarse_ksp_type = KSPPREONLY;
4533     coarse_pc_type = PCREDUNDANT;
4534   }
4535 
4536   /* print some info if requested */
4537   if (pcbddc->dbg_flag) {
4538     if (!multilevel_allowed) {
4539       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4540       if (multilevel_requested) {
4541         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4542       } else if (pcbddc->max_levels) {
4543         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4544       }
4545       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4546     }
4547   }
4548 
4549   /* create the coarse KSP object only once with defaults */
4550   if (coarse_mat_is) {
4551     MatReuse coarse_mat_reuse;
4552     PetscViewer dbg_viewer = NULL;
4553     if (pcbddc->dbg_flag) {
4554       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4555       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4556     }
4557     if (!pcbddc->coarse_ksp) {
4558       char prefix[256],str_level[16];
4559       size_t len;
4560       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4561       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4562       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4563       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4564       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4565       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4566       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4567       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4568       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4569       /* prefix */
4570       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4571       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4572       if (!pcbddc->current_level) {
4573         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4574         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4575       } else {
4576         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4577         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4578         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4579         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4580         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4581         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4582       }
4583       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4584       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4585       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4586       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4587       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4588       /* allow user customization */
4589       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4590     }
4591     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4592     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4593     if (nisdofs) {
4594       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4595       for (i=0;i<nisdofs;i++) {
4596         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4597       }
4598     }
4599     if (nisneu) {
4600       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4601       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4602     }
4603 
4604     /* get some info after set from options */
4605     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4606     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4607     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4608     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4609       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4610       isbddc = PETSC_FALSE;
4611     }
4612     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4613     if (isredundant) {
4614       KSP inner_ksp;
4615       PC  inner_pc;
4616       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4617       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4618       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4619     }
4620 
4621     /* assemble coarse matrix */
4622     if (coarse_reuse) {
4623       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4624       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4625       coarse_mat_reuse = MAT_REUSE_MATRIX;
4626     } else {
4627       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4628     }
4629     if (isbddc || isnn) {
4630       if (pcbddc->coarsening_ratio > 1) {
4631         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4632           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4633           if (pcbddc->dbg_flag) {
4634             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4635             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4636             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4637             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4638           }
4639         }
4640         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4641       } else {
4642         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4643         coarse_mat = coarse_mat_is;
4644       }
4645     } else {
4646       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4647     }
4648     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4649 
4650     /* propagate symmetry info of coarse matrix */
4651     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4652     if (pc->pmat->symmetric_set) {
4653       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4654     }
4655     if (pc->pmat->hermitian_set) {
4656       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4657     }
4658     if (pc->pmat->spd_set) {
4659       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4660     }
4661     /* set operators */
4662     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4663     if (pcbddc->dbg_flag) {
4664       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4665     }
4666   } else { /* processes non partecipating to coarse solver (if any) */
4667     coarse_mat = 0;
4668   }
4669   ierr = PetscFree(isarray);CHKERRQ(ierr);
4670 #if 0
4671   {
4672     PetscViewer viewer;
4673     char filename[256];
4674     sprintf(filename,"coarse_mat.m");
4675     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4676     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4677     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4678     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4679   }
4680 #endif
4681 
4682   /* Compute coarse null space (special handling by BDDC only) */
4683 #if 0
4684   if (pcbddc->NullSpace) {
4685     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4686   }
4687 #endif
4688 
4689   if (pcbddc->coarse_ksp) {
4690     Vec crhs,csol;
4691     PetscBool ispreonly;
4692 
4693     if (CoarseNullSpace) {
4694       if (isbddc) {
4695         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4696       } else {
4697         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4698       }
4699     }
4700     /* setup coarse ksp */
4701     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4702     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4703     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4704     /* hack */
4705     if (!csol) {
4706       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4707     }
4708     if (!crhs) {
4709       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4710     }
4711     /* Check coarse problem if in debug mode or if solving with an iterative method */
4712     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4713     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4714       KSP       check_ksp;
4715       KSPType   check_ksp_type;
4716       PC        check_pc;
4717       Vec       check_vec,coarse_vec;
4718       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4719       PetscInt  its;
4720       PetscBool compute_eigs;
4721       PetscReal *eigs_r,*eigs_c;
4722       PetscInt  neigs;
4723       const char *prefix;
4724 
4725       /* Create ksp object suitable for estimation of extreme eigenvalues */
4726       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4727       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4728       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4729       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4730       if (ispreonly) {
4731         check_ksp_type = KSPPREONLY;
4732         compute_eigs = PETSC_FALSE;
4733       } else {
4734         check_ksp_type = KSPGMRES;
4735         compute_eigs = PETSC_TRUE;
4736       }
4737       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4738       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4739       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4740       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4741       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4742       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4743       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4744       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4745       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4746       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4747       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4748       /* create random vec */
4749       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4750       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4751       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4752       if (CoarseNullSpace) {
4753         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4754       }
4755       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4756       /* solve coarse problem */
4757       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4758       if (CoarseNullSpace) {
4759         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4760       }
4761       /* set eigenvalue estimation if preonly has not been requested */
4762       if (compute_eigs) {
4763         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4764         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4765         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4766         lambda_max = eigs_r[neigs-1];
4767         lambda_min = eigs_r[0];
4768         if (pcbddc->use_coarse_estimates) {
4769           if (lambda_max>lambda_min) {
4770             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4771             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4772           }
4773         }
4774       }
4775 
4776       /* check coarse problem residual error */
4777       if (pcbddc->dbg_flag) {
4778         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4779         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4780         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4781         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4782         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4783         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4784         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4785         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4786         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4787         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4788         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4789         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4790         if (compute_eigs) {
4791           PetscReal lambda_max_s,lambda_min_s;
4792           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4793           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4794           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4795           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4796           for (i=0;i<neigs;i++) {
4797             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4798           }
4799         }
4800         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4801         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4802       }
4803       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4804       if (compute_eigs) {
4805         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4806         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4807       }
4808     }
4809   }
4810   /* print additional info */
4811   if (pcbddc->dbg_flag) {
4812     /* waits until all processes reaches this point */
4813     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4814     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4815     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4816   }
4817 
4818   /* free memory */
4819   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4820   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4821   PetscFunctionReturn(0);
4822 }
4823 
4824 #undef __FUNCT__
4825 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4826 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4827 {
4828   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4829   PC_IS*         pcis = (PC_IS*)pc->data;
4830   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4831   IS             subset,subset_mult,subset_n;
4832   PetscInt       local_size,coarse_size=0;
4833   PetscInt       *local_primal_indices=NULL;
4834   const PetscInt *t_local_primal_indices;
4835   PetscErrorCode ierr;
4836 
4837   PetscFunctionBegin;
4838   /* Compute global number of coarse dofs */
4839   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4840     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4841   }
4842   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4843   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
4844   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4845   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4846   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4847   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4848   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4849   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4850   if (local_size != pcbddc->local_primal_size) {
4851     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4852   }
4853   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4854   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4855   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4856   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4857   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4858 
4859   /* check numbering */
4860   if (pcbddc->dbg_flag) {
4861     PetscScalar coarsesum,*array,*array2;
4862     PetscInt    i;
4863     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4864 
4865     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4866     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4867     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4868     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4869     /* counter */
4870     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4871     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
4872     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4873     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4874     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4875     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4876 
4877     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4878     for (i=0;i<pcbddc->local_primal_size;i++) {
4879       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4880     }
4881     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4882     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4883     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4884     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4885     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4886     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4887     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4888     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4889     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4890     for (i=0;i<pcis->n;i++) {
4891       if (array[i] != 0.0 && array[i] != array2[i]) {
4892         PetscInt owned = (PetscInt)(array[i]);
4893         PetscInt neigh = (PetscInt)(array2[i]);
4894         set_error = PETSC_TRUE;
4895         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
4896       }
4897     }
4898     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4899     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4900     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4901     for (i=0;i<pcis->n;i++) {
4902       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4903     }
4904     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4905     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4906     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4907     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4908     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4909     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4910     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4911       PetscInt *gidxs;
4912 
4913       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4914       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4915       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4916       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4917       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4918       for (i=0;i<pcbddc->local_primal_size;i++) {
4919         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4920       }
4921       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4922       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4923     }
4924     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4925     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4926   }
4927   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4928   /* get back data */
4929   *coarse_size_n = coarse_size;
4930   *local_primal_indices_n = local_primal_indices;
4931   PetscFunctionReturn(0);
4932 }
4933 
4934 #undef __FUNCT__
4935 #define __FUNCT__ "PCBDDCGlobalToLocal"
4936 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4937 {
4938   IS             localis_t;
4939   PetscInt       i,lsize,*idxs,n;
4940   PetscScalar    *vals;
4941   PetscErrorCode ierr;
4942 
4943   PetscFunctionBegin;
4944   /* get indices in local ordering exploiting local to global map */
4945   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4946   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4947   for (i=0;i<lsize;i++) vals[i] = 1.0;
4948   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4949   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4950   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4951   if (idxs) { /* multilevel guard */
4952     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4953   }
4954   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4955   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4956   ierr = PetscFree(vals);CHKERRQ(ierr);
4957   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4958   /* now compute set in local ordering */
4959   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4960   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4961   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4962   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4963   for (i=0,lsize=0;i<n;i++) {
4964     if (PetscRealPart(vals[i]) > 0.5) {
4965       lsize++;
4966     }
4967   }
4968   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4969   for (i=0,lsize=0;i<n;i++) {
4970     if (PetscRealPart(vals[i]) > 0.5) {
4971       idxs[lsize++] = i;
4972     }
4973   }
4974   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4975   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4976   *localis = localis_t;
4977   PetscFunctionReturn(0);
4978 }
4979 
4980 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4981 #undef __FUNCT__
4982 #define __FUNCT__ "PCBDDCMatMult_Private"
4983 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4984 {
4985   PCBDDCChange_ctx change_ctx;
4986   PetscErrorCode   ierr;
4987 
4988   PetscFunctionBegin;
4989   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4990   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4991   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4992   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 #undef __FUNCT__
4997 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4998 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4999 {
5000   PCBDDCChange_ctx change_ctx;
5001   PetscErrorCode   ierr;
5002 
5003   PetscFunctionBegin;
5004   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5005   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5006   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5007   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5008   PetscFunctionReturn(0);
5009 }
5010 
5011 #undef __FUNCT__
5012 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5013 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5014 {
5015   PC_IS               *pcis=(PC_IS*)pc->data;
5016   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5017   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5018   Mat                 S_j;
5019   PetscInt            *used_xadj,*used_adjncy;
5020   PetscBool           free_used_adj;
5021   PetscErrorCode      ierr;
5022 
5023   PetscFunctionBegin;
5024   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5025   free_used_adj = PETSC_FALSE;
5026   if (pcbddc->sub_schurs_layers == -1) {
5027     used_xadj = NULL;
5028     used_adjncy = NULL;
5029   } else {
5030     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5031       used_xadj = pcbddc->mat_graph->xadj;
5032       used_adjncy = pcbddc->mat_graph->adjncy;
5033     } else if (pcbddc->computed_rowadj) {
5034       used_xadj = pcbddc->mat_graph->xadj;
5035       used_adjncy = pcbddc->mat_graph->adjncy;
5036     } else {
5037       PetscBool      flg_row=PETSC_FALSE;
5038       const PetscInt *xadj,*adjncy;
5039       PetscInt       nvtxs;
5040 
5041       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5042       if (flg_row) {
5043         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5044         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5045         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5046         free_used_adj = PETSC_TRUE;
5047       } else {
5048         pcbddc->sub_schurs_layers = -1;
5049         used_xadj = NULL;
5050         used_adjncy = NULL;
5051       }
5052       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5053     }
5054   }
5055 
5056   /* setup sub_schurs data */
5057   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5058   if (!sub_schurs->use_mumps) {
5059     /* pcbddc->ksp_D up to date only if not using MUMPS */
5060     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5061     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5062   } else {
5063     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5064     PetscBool isseqaij;
5065     if (!pcbddc->use_vertices && reuse_solvers) {
5066       PetscInt n_vertices;
5067 
5068       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5069       reuse_solvers = (PetscBool)!n_vertices;
5070     }
5071     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5072     if (!isseqaij) {
5073       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5074       if (matis->A == pcbddc->local_mat) {
5075         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5076         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5077       } else {
5078         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5079       }
5080     }
5081     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5082   }
5083   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5084 
5085   /* free adjacency */
5086   if (free_used_adj) {
5087     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5088   }
5089   PetscFunctionReturn(0);
5090 }
5091 
5092 #undef __FUNCT__
5093 #define __FUNCT__ "PCBDDCInitSubSchurs"
5094 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5095 {
5096   PC_IS               *pcis=(PC_IS*)pc->data;
5097   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5098   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5099   PCBDDCGraph         graph;
5100   PetscErrorCode      ierr;
5101 
5102   PetscFunctionBegin;
5103   /* attach interface graph for determining subsets */
5104   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5105     IS       verticesIS,verticescomm;
5106     PetscInt vsize,*idxs;
5107 
5108     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5109     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5110     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5111     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5112     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5113     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5114     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5115     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5116     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5117     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5118     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5119 /*
5120     if (pcbddc->dbg_flag) {
5121       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5122     }
5123 */
5124   } else {
5125     graph = pcbddc->mat_graph;
5126   }
5127 
5128   /* sub_schurs init */
5129   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5130 
5131   /* free graph struct */
5132   if (pcbddc->sub_schurs_rebuild) {
5133     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5134   }
5135   PetscFunctionReturn(0);
5136 }
5137