xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 8b9f24d41690183291fcb228f8cef41eae90bef8)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCBenignPopOrPushP0"
10 PetscErrorCode PCBDDCBenignPopOrPushP0(PC pc, Vec v, PetscBool pop)
11 {
12   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
13   PetscErrorCode ierr;
14 
15   PetscFunctionBegin;
16   if (!pcbddc->benign_sf) {
17     ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr);
18     ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,1,NULL,PETSC_OWN_POINTER,&pcbddc->benign_p0_gidx);CHKERRQ(ierr);
19   }
20   if (pop) { /* use SF to get values */
21     PetscScalar *array;
22 
23     ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
24     ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
25     ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,&pcbddc->benign_p0);CHKERRQ(ierr);
26     ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr);
27   } else { /* use VecSetValue */
28     ierr = VecSetValue(v,pcbddc->benign_p0_gidx,pcbddc->benign_p0,INSERT_VALUES);CHKERRQ(ierr);
29     ierr = VecAssemblyBegin(v);CHKERRQ(ierr);
30     ierr = VecAssemblyEnd(v);CHKERRQ(ierr);
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNCT__
36 #define __FUNCT__ "PCBDDCBenignPopOrPushB0"
37 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
38 {
39   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
40   PetscErrorCode ierr;
41 
42   PetscFunctionBegin;
43   /* TODO: add error checking
44     - avoid nested pop (or push) calls.
45     - cannot push before pop.
46   */
47   if (!pcbddc->zerodiag) {
48     PetscFunctionReturn(0);
49   }
50   if (pop) {
51     const PetscInt    *cB0_cols;
52     PetscInt          cB0_ncol;
53     const PetscScalar *cB0_vals;
54 
55     /* extract B_0 */
56     ierr = MatGetRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
57     pcbddc->B0_ncol = cB0_ncol;
58     ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
59     ierr = PetscMalloc2(cB0_ncol,&pcbddc->B0_cols,cB0_ncol,&pcbddc->B0_vals);CHKERRQ(ierr);
60     ierr = PetscMemcpy(pcbddc->B0_cols,cB0_cols,cB0_ncol*sizeof(PetscInt));CHKERRQ(ierr);
61     ierr = PetscMemcpy(pcbddc->B0_vals,cB0_vals,cB0_ncol*sizeof(PetscScalar));CHKERRQ(ierr);
62     ierr = MatRestoreRow(pcbddc->local_mat,pcbddc->benign_p0_lidx,&cB0_ncol,&cB0_cols,&cB0_vals);CHKERRQ(ierr);
63     /* remove rows and cols from local problem */
64     ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr);
65     ierr = MatZeroRowsColumns(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,1.,NULL,NULL);CHKERRQ(ierr);
66   } else { /* push */
67     ierr = MatSetValues(pcbddc->local_mat,1,&pcbddc->benign_p0_lidx,pcbddc->B0_ncol,pcbddc->B0_cols,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
68     ierr = MatSetValues(pcbddc->local_mat,pcbddc->B0_ncol,pcbddc->B0_cols,1,&pcbddc->benign_p0_lidx,pcbddc->B0_vals,INSERT_VALUES);CHKERRQ(ierr);
69     ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx,pcbddc->benign_p0_lidx,0.0,INSERT_VALUES);CHKERRQ(ierr);
70     ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
71     ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
72   }
73   PetscFunctionReturn(0);
74 }
75 
76 #undef __FUNCT__
77 #define __FUNCT__ "PCBDDCAdaptiveSelection"
78 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
79 {
80   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
81   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
82   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
83   PetscBLASInt    *B_iwork,*B_ifail;
84   PetscScalar     *work,lwork;
85   PetscScalar     *St,*S,*eigv;
86   PetscScalar     *Sarray,*Starray;
87   PetscReal       *eigs,thresh;
88   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
89   PetscBool       allocated_S_St;
90 #if defined(PETSC_USE_COMPLEX)
91   PetscReal       *rwork;
92 #endif
93   PetscErrorCode  ierr;
94 
95   PetscFunctionBegin;
96   if (!sub_schurs->use_mumps) {
97     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
98   }
99 
100   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
101     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
102   }
103 
104   if (pcbddc->dbg_flag) {
105     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
108     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
109   }
110 
111   if (pcbddc->dbg_flag) {
112     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
113   }
114 
115   /* max size of subsets */
116   mss = 0;
117   for (i=0;i<sub_schurs->n_subs;i++) {
118     PetscInt subset_size;
119 
120     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
121     mss = PetscMax(mss,subset_size);
122   }
123 
124   /* min/max and threshold */
125   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
126   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
127   nmax = PetscMax(nmin,nmax);
128   allocated_S_St = PETSC_FALSE;
129   if (nmin) {
130     allocated_S_St = PETSC_TRUE;
131   }
132 
133   /* allocate lapack workspace */
134   cum = cum2 = 0;
135   maxneigs = 0;
136   for (i=0;i<sub_schurs->n_subs;i++) {
137     PetscInt n,subset_size;
138 
139     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
140     n = PetscMin(subset_size,nmax);
141     cum += subset_size;
142     cum2 += subset_size*n;
143     maxneigs = PetscMax(maxneigs,n);
144   }
145   if (mss) {
146     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
147       PetscBLASInt B_itype = 1;
148       PetscBLASInt B_N = mss;
149       PetscReal    zero = 0.0;
150       PetscReal    eps = 0.0; /* dlamch? */
151 
152       B_lwork = -1;
153       S = NULL;
154       St = NULL;
155       eigs = NULL;
156       eigv = NULL;
157       B_iwork = NULL;
158       B_ifail = NULL;
159 #if defined(PETSC_USE_COMPLEX)
160       rwork = NULL;
161 #endif
162       thresh = 1.0;
163       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
164 #if defined(PETSC_USE_COMPLEX)
165       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
166 #else
167       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
168 #endif
169       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
170       ierr = PetscFPTrapPop();CHKERRQ(ierr);
171     } else {
172         /* TODO */
173     }
174   } else {
175     lwork = 0;
176   }
177 
178   nv = 0;
179   if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
180     ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr);
181   }
182   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
183   if (allocated_S_St) {
184     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
185   }
186   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
187 #if defined(PETSC_USE_COMPLEX)
188   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
189 #endif
190   ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
191                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
192                       nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
193                       nv+cum,&pcbddc->adaptive_constraints_idxs,
194                       nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
195   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
196 
197   maxneigs = 0;
198   cum = cum2 = cumarray = 0;
199   pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
200   pcbddc->adaptive_constraints_data_ptr[0] = 0;
201   if (sub_schurs->is_vertices && pcbddc->use_vertices) {
202     const PetscInt *idxs;
203 
204     ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
205     for (cum=0;cum<nv;cum++) {
206       pcbddc->adaptive_constraints_n[cum] = 1;
207       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
208       pcbddc->adaptive_constraints_data[cum] = 1.0;
209       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
210       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
211     }
212     cum2 = cum;
213     ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr);
214   }
215 
216   if (mss) { /* multilevel */
217     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
218     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
219   }
220 
221   for (i=0;i<sub_schurs->n_subs;i++) {
222 
223     const PetscInt *idxs;
224     PetscReal      infty = PETSC_MAX_REAL;
225     PetscInt       j,subset_size,eigs_start = 0;
226     PetscBLASInt   B_N;
227     PetscBool      same_data = PETSC_FALSE;
228 
229     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
230     ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
231     if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
232       if (sub_schurs->is_hermitian) {
233         PetscInt j,k;
234         if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */
235           ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236           ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237         }
238         for (j=0;j<subset_size;j++) {
239           for (k=j;k<subset_size;k++) {
240             S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
241             St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
242           }
243         }
244       } else {
245         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
246         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
247       }
248     } else {
249       S = Sarray + cumarray;
250       St = Starray + cumarray;
251     }
252 
253     ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
254     /* see if we can save some work */
255     if (sub_schurs->n_subs == 1) {
256       ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr);
257     }
258 
259     if (same_data) { /* there's no need of constraints here, deluxe scaling is enough */
260       B_neigs = 0;
261     } else {
262       /* Threshold: this is an heuristic for edges */
263       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
264 
265       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
266         PetscBLASInt B_itype = 1;
267         PetscBLASInt B_IL, B_IU;
268         PetscReal    eps = -1.0; /* dlamch? */
269         PetscInt     nmin_s;
270 
271         /* ask for eigenvalues larger than thresh */
272         if (pcbddc->dbg_flag) {
273           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
274         }
275         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
276 #if defined(PETSC_USE_COMPLEX)
277         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
278 #else
279         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
280 #endif
281         ierr = PetscFPTrapPop();CHKERRQ(ierr);
282         if (B_ierr) {
283           if (B_ierr < 0 ) {
284             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
285           } else if (B_ierr <= B_N) {
286             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
287           } else {
288             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
289           }
290         }
291 
292         if (B_neigs > nmax) {
293           if (pcbddc->dbg_flag) {
294             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
295           }
296           eigs_start = B_neigs -nmax;
297           B_neigs = nmax;
298         }
299 
300         nmin_s = PetscMin(nmin,B_N);
301         if (B_neigs < nmin_s) {
302           PetscBLASInt B_neigs2;
303 
304           B_IU = B_N - B_neigs;
305           B_IL = B_N - nmin_s + 1;
306           if (pcbddc->dbg_flag) {
307             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
308           }
309           if (sub_schurs->is_hermitian) {
310             PetscInt j;
311             for (j=0;j<subset_size;j++) {
312               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
313             }
314             for (j=0;j<subset_size;j++) {
315               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
316             }
317           } else {
318             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
319             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
320           }
321           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
322 #if defined(PETSC_USE_COMPLEX)
323           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
324 #else
325           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
326 #endif
327           ierr = PetscFPTrapPop();CHKERRQ(ierr);
328           B_neigs += B_neigs2;
329         }
330         if (B_ierr) {
331           if (B_ierr < 0 ) {
332             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
333           } else if (B_ierr <= B_N) {
334             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
335           } else {
336             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
337           }
338         }
339         if (pcbddc->dbg_flag) {
340           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);CHKERRQ(ierr);
341           for (j=0;j<B_neigs;j++) {
342             if (eigs[j] == 0.0) {
343               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");CHKERRQ(ierr);
344             } else {
345               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr);
346             }
347           }
348         }
349       } else {
350           /* TODO */
351       }
352     }
353     maxneigs = PetscMax(B_neigs,maxneigs);
354     pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
355     if (B_neigs) {
356       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358       if (pcbddc->dbg_flag > 1) {
359         PetscInt ii;
360         for (ii=0;ii<B_neigs;ii++) {
361           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr);
362           for (j=0;j<B_N;j++) {
363 #if defined(PETSC_USE_COMPLEX)
364             PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
365             PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
366             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr);
367 #else
368             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr);
369 #endif
370           }
371         }
372       }
373 #if 0
374       for (j=0;j<B_neigs;j++) {
375         PetscBLASInt Blas_N,Blas_one = 1.0;
376         PetscScalar norm;
377         ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
378         PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,
379                                                    &Blas_one,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
380         if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
381           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
382         } else {
383           norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
384         }
385         PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum]+j*subset_size,&Blas_one));
386       }
387 #endif
388       ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
389       pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
390       pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
391       cum++;
392     }
393     ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
394     /* shift for next computation */
395     cumarray += subset_size*subset_size;
396   }
397   if (pcbddc->dbg_flag) {
398     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
399   }
400 
401   if (mss) {
402     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
403     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
404     /* destroy matrices (junk) */
405     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
406     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
407   }
408   if (allocated_S_St) {
409     ierr = PetscFree2(S,St);CHKERRQ(ierr);
410   }
411   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
412 #if defined(PETSC_USE_COMPLEX)
413   ierr = PetscFree(rwork);CHKERRQ(ierr);
414 #endif
415   if (pcbddc->dbg_flag) {
416     PetscInt maxneigs_r;
417     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
418     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
419   }
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "PCBDDCSetUpSolvers"
425 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
426 {
427   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
428   PetscScalar    *coarse_submat_vals;
429   PetscErrorCode ierr;
430 
431   PetscFunctionBegin;
432   /* Setup local scatters R_to_B and (optionally) R_to_D */
433   /* PCBDDCSetUpLocalWorkVectors should be called first! */
434   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
435 
436   /* Setup local neumann solver ksp_R */
437   /* PCBDDCSetUpLocalScatters should be called first! */
438   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
439 
440   /* Change global null space passed in by the user if change of basis has been requested */
441   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
442     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
443   }
444 
445   /*
446      Setup local correction and local part of coarse basis.
447      Gives back the dense local part of the coarse matrix in column major ordering
448   */
449   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
450 
451   /* Compute total number of coarse nodes and setup coarse solver */
452   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
453 
454   /* free */
455   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "PCBDDCResetCustomization"
461 PetscErrorCode PCBDDCResetCustomization(PC pc)
462 {
463   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
464   PetscErrorCode ierr;
465 
466   PetscFunctionBegin;
467   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
468   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
469   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
470   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
471   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
472   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
473   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
474   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
475   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
476   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
477   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "PCBDDCResetTopography"
483 PetscErrorCode PCBDDCResetTopography(PC pc)
484 {
485   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
486   PetscErrorCode ierr;
487 
488   PetscFunctionBegin;
489   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
490   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
491   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
492   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
493   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "PCBDDCResetSolvers"
499 PetscErrorCode PCBDDCResetSolvers(PC pc)
500 {
501   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
502   PetscScalar    *array;
503   PetscErrorCode ierr;
504 
505   PetscFunctionBegin;
506   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
507   if (pcbddc->coarse_phi_B) {
508     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
509     ierr = PetscFree(array);CHKERRQ(ierr);
510   }
511   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
512   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
513   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
514   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
515   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
516   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
517   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
518   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
519   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
520   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
521   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
522   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
523   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
524   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
525   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
526   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
527   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
528   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
529   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
530   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
531   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
532   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
533   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
534   ierr = ISDestroy(&pcbddc->zerodiag);CHKERRQ(ierr);
535   ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr);
536   ierr = MatDestroy(&pcbddc->benign_original_mat);CHKERRQ(ierr);
537   ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr);
538   ierr = PetscFree2(pcbddc->B0_cols,pcbddc->B0_vals);CHKERRQ(ierr);
539   PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
544 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
545 {
546   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
547   PC_IS          *pcis = (PC_IS*)pc->data;
548   VecType        impVecType;
549   PetscInt       n_constraints,n_R,old_size,n_benign;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   if (!pcbddc->ConstraintMatrix) {
554     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
555   }
556   /* get sizes */
557   n_benign = (PetscInt)(!!pcbddc->zerodiag);
558   n_constraints = pcbddc->local_primal_size - n_benign - pcbddc->n_vertices;
559   if (n_benign && n_constraints) {
560     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot handle benign susbspace trick without change of basis");
561   }
562   n_R = pcis->n - pcbddc->n_vertices;
563   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
564   /* local work vectors (try to avoid unneeded work)*/
565   /* R nodes */
566   old_size = -1;
567   if (pcbddc->vec1_R) {
568     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
569   }
570   if (n_R != old_size) {
571     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
572     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
573     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
574     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
575     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
576     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
577   }
578   /* local primal dofs */
579   old_size = -1;
580   if (pcbddc->vec1_P) {
581     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
582   }
583   if (pcbddc->local_primal_size != old_size) {
584     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
585     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
586     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
587     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
588   }
589   /* local explicit constraints */
590   old_size = -1;
591   if (pcbddc->vec1_C) {
592     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
593   }
594   if (n_constraints && n_constraints != old_size) {
595     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
596     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
597     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
598     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNCT__
604 #define __FUNCT__ "PCBDDCSetUpCorrection"
605 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
606 {
607   PetscErrorCode  ierr;
608   /* pointers to pcis and pcbddc */
609   PC_IS*          pcis = (PC_IS*)pc->data;
610   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
611   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
612   /* submatrices of local problem */
613   Mat             A_RV,A_VR,A_VV,local_auxmat2_R;
614   /* submatrices of benign trick */
615   Mat             B0_R,B0_V;
616   /* submatrices of local coarse problem */
617   Mat             S_VV,S_CV,S_VC,S_CC;
618   /* working matrices */
619   Mat             C_CR;
620   /* additional working stuff */
621   PC              pc_R;
622   Mat             F;
623   Vec             vec_R_benign;
624   PetscBool       isLU,isCHOL,isILU;
625 
626   PetscScalar     *coarse_submat_vals; /* TODO: use a PETSc matrix */
627   PetscScalar     *work;
628   PetscInt        *idx_V_B;
629   PetscInt        n,n_vertices,n_constraints,n_benign;
630   PetscInt        i,n_R,n_D,n_B;
631   PetscBool       unsymmetric_check;
632   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
633   MatType         impMatType;
634   /* some shortcuts to scalars */
635   PetscScalar     one=1.0,m_one=-1.0;
636 
637   PetscFunctionBegin;
638   n_benign = (PetscInt)(!!pcbddc->zerodiag);
639   n_vertices = pcbddc->n_vertices;
640   n_constraints = pcbddc->local_primal_size - n_benign - n_vertices;
641   /* Set Non-overlapping dimensions */
642   n_B = pcis->n_B;
643   n_D = pcis->n - n_B;
644   n_R = pcis->n - n_vertices;
645 
646   /* Set types for local objects needed by BDDC precondtioner */
647   impMatType = MATSEQDENSE;
648 
649   /* vertices in boundary numbering */
650   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
651   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr);
652   if (i != n_vertices) {
653     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
654   }
655 
656   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
657   ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
658   if (!n_benign) {
659     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
660     ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
661     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
662     ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
663     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
664     ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
665     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
666     ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
667   } else { /* these wrappers are not needed with the benign trick */
668     S_VV = S_CV = S_VC = S_CC = NULL;
669   }
670 
671   unsymmetric_check = PETSC_FALSE;
672   /* allocate workspace */
673   n = 0;
674   if (n_constraints) {
675     n += n_R*n_constraints;
676   }
677   if (n_vertices) {
678     n = PetscMax(2*n_R*n_vertices,n);
679     n = PetscMax((n_R+n_B)*n_vertices,n);
680   }
681   if (!pcbddc->symmetric_primal) {
682     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
683     unsymmetric_check = PETSC_TRUE;
684   }
685   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
686 
687   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
688   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
689   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
690   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
691   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
692   if (isLU || isILU || isCHOL) {
693     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
694   } else if (sub_schurs->reuse_mumps) {
695     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
696     MatFactorType type;
697 
698     F = reuse_mumps->F;
699     ierr = MatGetFactorType(F,&type);CHKERRQ(ierr);
700     if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
701   } else {
702     F = NULL;
703   }
704 
705   /* Precompute stuffs needed for preprocessing and application of BDDC*/
706   if (n_constraints) {
707     Mat         M1,M2,M3;
708     Mat         auxmat;
709     IS          is_aux;
710     PetscScalar *array,*array2;
711 
712     ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
713     ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
714 
715     /* Extract constraints on R nodes: C_{CR}  */
716     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
717     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
718     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&auxmat);CHKERRQ(ierr);
719 
720     /* Assemble         local_auxmat2_R =        (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
721     /* Assemble pcbddc->local_auxmat2   = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
722     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
723     for (i=0;i<n_constraints;i++) {
724       const PetscScalar *row_cmat_values;
725       const PetscInt    *row_cmat_indices;
726       PetscInt          size_of_constraint,j;
727 
728       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
729       for (j=0;j<size_of_constraint;j++) {
730         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
731       }
732       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
733     }
734     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr);
735     if (F) {
736       Mat B;
737 
738       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
739       ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr);
740       ierr = MatDestroy(&B);CHKERRQ(ierr);
741     } else {
742       PetscScalar *marr;
743 
744       ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
745       for (i=0;i<n_constraints;i++) {
746         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
747         ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*n_R);CHKERRQ(ierr);
748         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
749         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
750         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
751       }
752       ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr);
753     }
754     if (!pcbddc->switch_static) {
755       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr);
756       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
757       ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
758       for (i=0;i<n_constraints;i++) {
759         ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*n_R);CHKERRQ(ierr);
760         ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr);
761         ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
762         ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
763         ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
764         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
765       }
766       ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr);
767       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
768       ierr = MatMatMult(auxmat,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
769     } else {
770       ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr);
771       pcbddc->local_auxmat2 = local_auxmat2_R;
772       ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
773     }
774     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
775     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
776     ierr = MatScale(M3,m_one);CHKERRQ(ierr);
777     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
778     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
779     if (isCHOL) {
780       ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr);
781     } else {
782       ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
783     }
784     ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr);
785     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
786     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
787     ierr = MatDestroy(&M2);CHKERRQ(ierr);
788     ierr = MatDestroy(&M3);CHKERRQ(ierr);
789     /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
790     ierr = MatMatMult(M1,auxmat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
791     ierr = MatDestroy(&auxmat);CHKERRQ(ierr);
792     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
793     ierr = MatDestroy(&M1);CHKERRQ(ierr);
794   }
795   /* Get submatrices from subdomain matrix */
796   if (n_vertices) {
797     IS is_aux;
798 
799     if (sub_schurs->reuse_mumps) { /* is_R_local is not sorted, ISComplement doesn't like it */
800       IS tis;
801 
802       ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr);
803       ierr = ISSort(tis);CHKERRQ(ierr);
804       ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr);
805       ierr = ISDestroy(&tis);CHKERRQ(ierr);
806     } else {
807       ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
808     }
809     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
810     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
811     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
812     if (n_benign) {
813       Mat      B0;
814       IS       dummy;
815       PetscInt ii[2];
816 
817       ii[0] = 0;
818       ii[1] = pcbddc->B0_ncol;
819       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,1,pcis->n,ii,pcbddc->B0_cols,pcbddc->B0_vals,&B0);CHKERRQ(ierr);
820       ierr = ISCreateStride(PETSC_COMM_SELF,1,0,1,&dummy);CHKERRQ(ierr);
821       ierr = MatGetSubMatrix(B0,dummy,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&B0_R);CHKERRQ(ierr);
822       ierr = MatGetSubMatrix(B0,dummy,is_aux,MAT_INITIAL_MATRIX,&B0_V);CHKERRQ(ierr);
823       ierr = MatDestroy(&B0);CHKERRQ(ierr);
824       ierr = ISDestroy(&dummy);CHKERRQ(ierr);
825     }
826     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
827   }
828 
829   /* Matrix of coarse basis functions (local) */
830   if (pcbddc->coarse_phi_B) {
831     PetscInt on_B,on_primal,on_D=n_D;
832     if (pcbddc->coarse_phi_D) {
833       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
834     }
835     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
836     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
837       PetscScalar *marray;
838 
839       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
840       ierr = PetscFree(marray);CHKERRQ(ierr);
841       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
842       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
843       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
844       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
845     }
846   }
847 
848   if (!pcbddc->coarse_phi_B) {
849     PetscScalar *marray;
850 
851     n = n_B*pcbddc->local_primal_size;
852     if (pcbddc->switch_static || pcbddc->dbg_flag) {
853       n += n_D*pcbddc->local_primal_size;
854     }
855     if (!pcbddc->symmetric_primal) {
856       n *= 2;
857     }
858     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
859     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
860     n = n_B*pcbddc->local_primal_size;
861     if (pcbddc->switch_static || pcbddc->dbg_flag) {
862       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
863       n += n_D*pcbddc->local_primal_size;
864     }
865     if (!pcbddc->symmetric_primal) {
866       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
867       if (pcbddc->switch_static || pcbddc->dbg_flag) {
868         n = n_B*pcbddc->local_primal_size;
869         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
870       }
871     } else {
872       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
873       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
874       if (pcbddc->switch_static || pcbddc->dbg_flag) {
875         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
876         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
877       }
878     }
879   }
880 
881   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
882   /* vertices */
883   if (n_vertices) {
884 
885     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
886 
887     if (n_R) {
888       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
889       Mat          S_V_benign;
890       PetscBLASInt B_N,B_one = 1;
891       PetscScalar  *x,*y;
892       PetscBool    isseqaij;
893 
894       ierr = MatScale(A_RV,m_one);CHKERRQ(ierr);
895       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
896       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
897       if (F) { /* TODO could be optimized for symmetric problems */
898         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
899       } else {
900         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
901         for (i=0;i<n_vertices;i++) {
902           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
903           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
904           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
905           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
906           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
907         }
908         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
909       }
910       if (n_benign) {
911         const PetscScalar *vals;
912         const PetscInt    *idxs;
913         PetscInt          n;
914 
915         ierr = VecDuplicate(pcbddc->vec1_R,&vec_R_benign);CHKERRQ(ierr);
916         ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
917         ierr = MatGetRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
918         ierr = VecSetValues(pcbddc->vec1_R,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
919         ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
920         ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
921         ierr = MatRestoreRow(B0_R,0,&n,&idxs,&vals);CHKERRQ(ierr);
922         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,vec_R_benign);CHKERRQ(ierr);
923         ierr = VecScale(vec_R_benign,m_one);CHKERRQ(ierr);
924       }
925       ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
926       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
927       if (n_constraints) {
928         Mat B;
929 
930         ierr = PetscMemzero(work+n_R*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
931         for (i=0;i<n_vertices;i++) {
932           ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
933           ierr = VecPlaceArray(pcis->vec1_B,work+n_R*n_vertices+i*n_B);CHKERRQ(ierr);
934           ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935           ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936           ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
937           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
938         }
939         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
940         ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
941         ierr = MatDestroy(&B);CHKERRQ(ierr);
942         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
943         ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
944         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
945         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
946         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
947         ierr = MatDestroy(&B);CHKERRQ(ierr);
948       }
949       ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
950       if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */
951         ierr = MatConvert(A_VR,MATSEQAIJ,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
952       }
953       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
954       if (n_benign) {
955         ierr = MatMatMult(B0_R,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_V_benign);CHKERRQ(ierr);
956       }
957       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
958       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
959       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
960       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
961       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
962       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
963       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
964       if (!n_benign) {
965         ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
966       } else {
967         const PetscScalar *vals;
968         PetscScalar       val;
969         const PetscInt    *idxs;
970         PetscInt          n,j;
971 
972         /* Use explicit loops to populate the local coarse matrix */
973         ierr = MatDenseGetArray(S_VVt,&x);CHKERRQ(ierr);
974         for (j=0;j<n_vertices;j++) {
975           for (i=0;i<n_vertices;i++) {
976             coarse_submat_vals[j*pcbddc->local_primal_size+i] = x[j*n_vertices+i];
977           }
978         }
979         ierr = MatDenseRestoreArray(S_VVt,&x);CHKERRQ(ierr);
980         ierr = MatDenseGetArray(S_V_benign,&x);CHKERRQ(ierr);
981         ierr = MatGetRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
982         for (j=0;j<n_vertices;j++) {
983           coarse_submat_vals[n_vertices*pcbddc->local_primal_size+j] = x[j]+vals[j];
984           coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices] = x[j]+vals[j];
985         }
986         ierr = MatRestoreRow(B0_V,0,&n,&idxs,&vals);CHKERRQ(ierr);
987         ierr = MatDenseRestoreArray(S_V_benign,&x);CHKERRQ(ierr);
988         ierr = MatDestroy(&S_V_benign);CHKERRQ(ierr);
989         ierr = VecDot(vec_R_benign,vec_R_benign,&val);CHKERRQ(ierr);
990         coarse_submat_vals[pcbddc->local_primal_size*pcbddc->local_primal_size-1] = -val;
991       }
992       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
993     } else {
994       /* TODO benign case also here */
995       if (n_benign) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not yet implemented");
996       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
997     }
998     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
999     /* coarse basis functions */
1000     for (i=0;i<n_vertices;i++) {
1001       PetscScalar *y;
1002 
1003       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1004       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1005       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1006       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008       y[n_B*i+idx_V_B[i]] = 1.0;
1009       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1010       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1011 
1012       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1013         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1014         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1015         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1017         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1018         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1019       }
1020       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1021     }
1022     if (n_benign) {
1023       PetscScalar *y;
1024 
1025       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1026       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*n_vertices);CHKERRQ(ierr);
1027       ierr = VecScatterBegin(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028       ierr = VecScatterEnd(pcbddc->R_to_B,vec_R_benign,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1030       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1031 
1032       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1033         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1034         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1035         ierr = VecScatterBegin(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036         ierr = VecScatterEnd(pcbddc->R_to_D,vec_R_benign,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1038         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1039       }
1040     }
1041     /* if n_R == 0 the object is not destroyed */
1042     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
1043   }
1044 
1045   if (n_constraints) {
1046     Mat B;
1047 
1048     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
1049     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1050     ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
1051     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
1052     if (n_vertices) {
1053       if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
1054         ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr);
1055       } else {
1056         Mat S_VCt;
1057 
1058         ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr);
1059         ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1060         ierr = MatDestroy(&S_VCt);CHKERRQ(ierr);
1061       }
1062     }
1063     ierr = MatDestroy(&B);CHKERRQ(ierr);
1064     /* coarse basis functions */
1065     for (i=0;i<n_constraints;i++) {
1066       PetscScalar *y;
1067 
1068       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
1069       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1070       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
1071       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1072       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1073       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
1074       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1075       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1076         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1077         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
1078         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1079         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1080         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1081         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
1082       }
1083       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1084     }
1085   }
1086   if (n_constraints) {
1087     ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr);
1088   }
1089 
1090   if (n_benign) {
1091     ierr = MatDestroy(&B0_R);CHKERRQ(ierr);
1092     ierr = MatDestroy(&B0_V);CHKERRQ(ierr);
1093     ierr = VecDestroy(&vec_R_benign);CHKERRQ(ierr);
1094   }
1095 
1096   /* compute other basis functions for non-symmetric problems */
1097   if (!pcbddc->symmetric_primal) {
1098 
1099     if (n_constraints) {
1100       Mat S_CCT,B_C;
1101 
1102       /* this is a lazy thing */
1103       ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
1104       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
1105       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
1106       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
1107       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
1108       if (n_vertices) {
1109         Mat B_V,S_VCT;
1110 
1111         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
1112         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
1113         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
1114         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
1115         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
1116       }
1117       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
1118     } else { /* if there are no constraints, reset work */
1119       ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
1120     }
1121     if (n_vertices && n_R) {
1122       Mat          A_VRT;
1123       PetscScalar  *marray;
1124       PetscBLASInt B_N,B_one = 1;
1125 
1126       ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_VRT);CHKERRQ(ierr);
1127       ierr = MatConvert(A_VRT,impMatType,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
1128       ierr = MatDenseGetArray(A_VRT,&marray);CHKERRQ(ierr);
1129       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
1130       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,marray,&B_one,work,&B_one));
1131       ierr = MatDenseRestoreArray(A_VRT,&marray);CHKERRQ(ierr);
1132       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
1133     }
1134 
1135     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
1136       for (i=0;i<pcbddc->local_primal_size;i++) {
1137         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1138         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1139         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1140         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1141         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1142       }
1143     } else {
1144       for (i=0;i<pcbddc->local_primal_size;i++) {
1145         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
1146         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
1147         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1148         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1149         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
1150       }
1151     }
1152     /* coarse basis functions */
1153     for (i=0;i<pcbddc->local_primal_size;i++) {
1154       PetscScalar *y;
1155 
1156       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
1157       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1158       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
1159       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161       if (i<n_vertices) {
1162         y[n_B*i+idx_V_B[i]] = 1.0;
1163       }
1164       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
1165       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
1166 
1167       if (pcbddc->switch_static || pcbddc->dbg_flag) {
1168         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1169         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
1170         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1171         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1172         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
1173         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
1174       }
1175       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
1176     }
1177   }
1178   /* free memory */
1179   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
1180   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
1181   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
1182   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
1183   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
1184   ierr = PetscFree(work);CHKERRQ(ierr);
1185   if (n_vertices) {
1186     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1187   }
1188   if (n_constraints) {
1189     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1190   }
1191   /* Checking coarse_sub_mat and coarse basis functios */
1192   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1193   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
1194   //if (pcbddc->dbg_flag) {
1195   if (0) {
1196     Mat         coarse_sub_mat;
1197     Mat         AUXMAT,TM1,TM2,TM3,TM4;
1198     Mat         coarse_phi_D,coarse_phi_B;
1199     Mat         coarse_psi_D,coarse_psi_B;
1200     Mat         A_II,A_BB,A_IB,A_BI;
1201     Mat         C_B,CPHI;
1202     IS          is_dummy;
1203     Vec         mones;
1204     MatType     checkmattype=MATSEQAIJ;
1205     PetscReal   real_value;
1206 
1207     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
1208     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
1209     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
1210     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
1211     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
1212     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
1213     if (unsymmetric_check) {
1214       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
1215       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
1216     }
1217     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
1218 
1219     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1220     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr);
1221     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1222     if (unsymmetric_check) {
1223       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1224       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1225       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1226       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1227       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1228       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1229       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1230       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1231       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1232       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1233       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1234       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1235     } else {
1236       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1237       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1238       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1239       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1240       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1241       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1242       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1243       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1244     }
1245     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1246     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1247     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1248     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1249     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1250     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1251     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1252     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d          matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1253 
1254     /* check constraints */
1255     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1256     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1257     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1258     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1259     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1260     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1261     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1262     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1263     if (unsymmetric_check) {
1264       ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1265       ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1266       ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1267       ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1268       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1269     }
1270     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1271     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1272     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1273     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1274 
1275     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1276     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1277     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1278     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1279     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1280     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1281     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1282     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1283     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1284     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1285     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1286     if (unsymmetric_check) {
1287       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1288       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1289     }
1290     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1291   }
1292   /* get back data */
1293   *coarse_submat_vals_n = coarse_submat_vals;
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1299 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1300 {
1301   Mat            *work_mat;
1302   IS             isrow_s,iscol_s;
1303   PetscBool      rsorted,csorted;
1304   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1305   PetscErrorCode ierr;
1306 
1307   PetscFunctionBegin;
1308   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1309   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1310   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1311   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1312 
1313   if (!rsorted) {
1314     const PetscInt *idxs;
1315     PetscInt *idxs_sorted,i;
1316 
1317     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1318     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1319     for (i=0;i<rsize;i++) {
1320       idxs_perm_r[i] = i;
1321     }
1322     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1323     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1324     for (i=0;i<rsize;i++) {
1325       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1326     }
1327     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1328     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1329   } else {
1330     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1331     isrow_s = isrow;
1332   }
1333 
1334   if (!csorted) {
1335     if (isrow == iscol) {
1336       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1337       iscol_s = isrow_s;
1338     } else {
1339       const PetscInt *idxs;
1340       PetscInt *idxs_sorted,i;
1341 
1342       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1343       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1344       for (i=0;i<csize;i++) {
1345         idxs_perm_c[i] = i;
1346       }
1347       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1348       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1349       for (i=0;i<csize;i++) {
1350         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1351       }
1352       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1353       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1354     }
1355   } else {
1356     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1357     iscol_s = iscol;
1358   }
1359 
1360   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1361 
1362   if (!rsorted || !csorted) {
1363     Mat      new_mat;
1364     IS       is_perm_r,is_perm_c;
1365 
1366     if (!rsorted) {
1367       PetscInt *idxs_r,i;
1368       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1369       for (i=0;i<rsize;i++) {
1370         idxs_r[idxs_perm_r[i]] = i;
1371       }
1372       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1373       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1374     } else {
1375       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1376     }
1377     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1378 
1379     if (!csorted) {
1380       if (isrow_s == iscol_s) {
1381         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1382         is_perm_c = is_perm_r;
1383       } else {
1384         PetscInt *idxs_c,i;
1385         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1386         for (i=0;i<csize;i++) {
1387           idxs_c[idxs_perm_c[i]] = i;
1388         }
1389         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1390         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1391       }
1392     } else {
1393       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1394     }
1395     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1396 
1397     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1398     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1399     work_mat[0] = new_mat;
1400     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1401     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1402   }
1403 
1404   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1405   *B = work_mat[0];
1406   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1407   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1408   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1414 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1415 {
1416   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1417   PC_IS*         pcis = (PC_IS*)pc->data;
1418   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1419   Mat            new_mat;
1420   IS             is_local,is_global;
1421   PetscInt       local_size;
1422   PetscBool      isseqaij;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1427   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1428   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1429   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,is_local,&is_global);CHKERRQ(ierr);
1430   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1431   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1432   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1433 
1434   /* check */
1435   if (pcbddc->dbg_flag) {
1436     Vec       x,x_change;
1437     PetscReal error;
1438 
1439     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1440     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1441     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1442     ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443     ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1444     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1445     ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1446     ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1447     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1448     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1449     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1450     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1451     ierr = VecDestroy(&x);CHKERRQ(ierr);
1452     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1453   }
1454 
1455   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1456   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1457   if (isseqaij) {
1458     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1459   } else {
1460     Mat work_mat;
1461     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1462     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1463     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1464   }
1465   if (matis->A->symmetric_set) {
1466     ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr);
1467 #if !defined(PETSC_USE_COMPLEX)
1468     ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr);
1469 #endif
1470   }
1471   /*
1472   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1473   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1474   */
1475   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1481 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1482 {
1483   PC_IS*          pcis = (PC_IS*)(pc->data);
1484   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
1485   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1486   PetscInt        *idx_R_local=NULL;
1487   PetscInt        n_vertices,i,j,n_R,n_D,n_B;
1488   PetscInt        vbs,bs;
1489   PetscBT         bitmask=NULL;
1490   PetscErrorCode  ierr;
1491 
1492   PetscFunctionBegin;
1493   /*
1494     No need to setup local scatters if
1495       - primal space is unchanged
1496         AND
1497       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1498         AND
1499       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1500   */
1501   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1502     PetscFunctionReturn(0);
1503   }
1504   /* destroy old objects */
1505   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1506   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1507   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1508   /* Set Non-overlapping dimensions */
1509   n_B = pcis->n_B;
1510   n_D = pcis->n - n_B;
1511   n_vertices = pcbddc->n_vertices;
1512 
1513   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1514 
1515   /* create auxiliary bitmask and allocate workspace */
1516   if (!sub_schurs->reuse_mumps) {
1517     ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1518     ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1519     for (i=0;i<n_vertices;i++) {
1520       ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr);
1521     }
1522 
1523     for (i=0, n_R=0; i<pcis->n; i++) {
1524       if (!PetscBTLookup(bitmask,i)) {
1525         idx_R_local[n_R++] = i;
1526       }
1527     }
1528   } else { /* A different ordering (already computed) is present if we are reusing MUMPS Schur solver */
1529     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1530 
1531     ierr = ISGetIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1532     ierr = ISGetLocalSize(reuse_mumps->is_R,&n_R);CHKERRQ(ierr);
1533   }
1534 
1535   /* Block code */
1536   vbs = 1;
1537   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1538   if (bs>1 && !(n_vertices%bs)) {
1539     PetscBool is_blocked = PETSC_TRUE;
1540     PetscInt  *vary;
1541     if (!sub_schurs->reuse_mumps) {
1542       ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1543       ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1544       /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1545       /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
1546       for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
1547       for (i=0; i<pcis->n/bs; i++) {
1548         if (vary[i]!=0 && vary[i]!=bs) {
1549           is_blocked = PETSC_FALSE;
1550           break;
1551         }
1552       }
1553       ierr = PetscFree(vary);CHKERRQ(ierr);
1554     } else {
1555       /* Verify directly the R set */
1556       for (i=0; i<n_R/bs; i++) {
1557         PetscInt j,node=idx_R_local[bs*i];
1558         for (j=1; j<bs; j++) {
1559           if (node != idx_R_local[bs*i+j]-j) {
1560             is_blocked = PETSC_FALSE;
1561             break;
1562           }
1563         }
1564       }
1565     }
1566     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1567       vbs = bs;
1568       for (i=0;i<n_R/vbs;i++) {
1569         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1570       }
1571     }
1572   }
1573   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1574   if (sub_schurs->reuse_mumps) {
1575     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1576 
1577     ierr = ISRestoreIndices(reuse_mumps->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1578     ierr = ISDestroy(&reuse_mumps->is_R);CHKERRQ(ierr);
1579     ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr);
1580     reuse_mumps->is_R = pcbddc->is_R_local;
1581   } else {
1582     ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1583   }
1584 
1585   /* print some info if requested */
1586   if (pcbddc->dbg_flag) {
1587     PetscInt benign;
1588 
1589     benign = (PetscInt)(!!pcbddc->zerodiag);
1590     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1591     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1592     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1593     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1594     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1595     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-benign,pcbddc->local_primal_size);CHKERRQ(ierr);
1596     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1597   }
1598 
1599   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1600   if (!sub_schurs->reuse_mumps) {
1601     IS       is_aux1,is_aux2;
1602     PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1603 
1604     ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1605     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1606     ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1607     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1608     for (i=0; i<n_D; i++) {
1609       ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1610     }
1611     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1612     for (i=0, j=0; i<n_R; i++) {
1613       if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1614         aux_array1[j++] = i;
1615       }
1616     }
1617     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1618     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1619     for (i=0, j=0; i<n_B; i++) {
1620       if (!PetscBTLookup(bitmask,is_indices[i])) {
1621         aux_array2[j++] = i;
1622       }
1623     }
1624     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1625     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1626     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1627     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1628     ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1629 
1630     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1631       ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1632       for (i=0, j=0; i<n_R; i++) {
1633         if (PetscBTLookup(bitmask,idx_R_local[i])) {
1634           aux_array1[j++] = i;
1635         }
1636       }
1637       ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1638       ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1639       ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1640     }
1641     ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1642     ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1643   } else {
1644     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1645     IS               tis;
1646     PetscInt         schur_size;
1647 
1648     ierr = ISGetLocalSize(reuse_mumps->is_B,&schur_size);CHKERRQ(ierr);
1649     ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr);
1650     ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_mumps->is_B,&pcbddc->R_to_B);CHKERRQ(ierr);
1651     ierr = ISDestroy(&tis);CHKERRQ(ierr);
1652     if (pcbddc->switch_static || pcbddc->dbg_flag) {
1653       ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr);
1654       ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1655       ierr = ISDestroy(&tis);CHKERRQ(ierr);
1656     }
1657   }
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1664 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1665 {
1666   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1667   PC_IS          *pcis = (PC_IS*)pc->data;
1668   PC             pc_temp;
1669   Mat            A_RR;
1670   MatReuse       reuse;
1671   PetscScalar    m_one = -1.0;
1672   PetscReal      value;
1673   PetscInt       n_D,n_R;
1674   PetscBool      use_exact,use_exact_reduced,issbaij;
1675   PetscErrorCode ierr;
1676   /* prefixes stuff */
1677   char           dir_prefix[256],neu_prefix[256],str_level[16];
1678   size_t         len;
1679 
1680   PetscFunctionBegin;
1681 
1682   /* compute prefixes */
1683   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1684   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1685   if (!pcbddc->current_level) {
1686     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1687     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1688     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1689     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1690   } else {
1691     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1692     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1693     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1694     len -= 15; /* remove "pc_bddc_coarse_" */
1695     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1696     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1697     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1698     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1699     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1700     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1701     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1702     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1703   }
1704 
1705   /* DIRICHLET PROBLEM */
1706   if (dirichlet) {
1707     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1708     if (pcbddc->local_mat->symmetric_set) {
1709       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1710     }
1711     /* Matrix for Dirichlet problem is pcis->A_II */
1712     n_D = pcis->n - pcis->n_B;
1713     if (!pcbddc->ksp_D) { /* create object if not yet build */
1714       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1715       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1716       /* default */
1717       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1718       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1719       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1720       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1721       if (issbaij) {
1722         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1723       } else {
1724         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1725       }
1726       /* Allow user's customization */
1727       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1728       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1729     }
1730     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1731     if (sub_schurs->reuse_mumps) {
1732       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1733 
1734       ierr = KSPSetPC(pcbddc->ksp_D,reuse_mumps->interior_solver);CHKERRQ(ierr);
1735     }
1736     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1737     if (!n_D) {
1738       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1739       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1740     }
1741     /* Set Up KSP for Dirichlet problem of BDDC */
1742     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1743     /* set ksp_D into pcis data */
1744     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1745     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1746     pcis->ksp_D = pcbddc->ksp_D;
1747   }
1748 
1749   /* NEUMANN PROBLEM */
1750   A_RR = 0;
1751   if (neumann) {
1752     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1753     PetscInt        ibs,mbs;
1754     PetscBool       issbaij;
1755     Mat_IS*         matis = (Mat_IS*)pc->pmat->data;
1756     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1757     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1758     if (pcbddc->ksp_R) { /* already created ksp */
1759       PetscInt nn_R;
1760       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1761       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1762       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1763       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1764         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1765         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1766         reuse = MAT_INITIAL_MATRIX;
1767       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1768         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1769           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1770           reuse = MAT_INITIAL_MATRIX;
1771         } else { /* safe to reuse the matrix */
1772           reuse = MAT_REUSE_MATRIX;
1773         }
1774       }
1775       /* last check */
1776       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1777         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1778         reuse = MAT_INITIAL_MATRIX;
1779       }
1780     } else { /* first time, so we need to create the matrix */
1781       reuse = MAT_INITIAL_MATRIX;
1782     }
1783     /* extract A_RR */
1784     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1785     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1786     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1787     if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
1788       if (matis->A == pcbddc->local_mat) {
1789         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1790         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1791       } else {
1792         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1793       }
1794     } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
1795       if (matis->A == pcbddc->local_mat) {
1796         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1797         ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1798       } else {
1799         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
1800       }
1801     }
1802     if (!sub_schurs->reuse_mumps) {
1803       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1804       if (pcbddc->local_mat->symmetric_set) {
1805         ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr);
1806       }
1807     } else {
1808       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1809 
1810       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1811       ierr = PCGetOperators(reuse_mumps->correction_solver,&A_RR,NULL);CHKERRQ(ierr);
1812       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1813     }
1814     if (!pcbddc->ksp_R) { /* create object if not present */
1815       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1816       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1817       /* default */
1818       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1819       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1820       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1821       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1822       if (issbaij) {
1823         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1824       } else {
1825         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1826       }
1827       /* Allow user's customization */
1828       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1829       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1830     }
1831     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1832     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1833     if (!n_R) {
1834       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1835       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1836     }
1837     /* Reuse MUMPS solver if it is present */
1838     if (sub_schurs->reuse_mumps) {
1839       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1840 
1841       ierr = KSPSetPC(pcbddc->ksp_R,reuse_mumps->correction_solver);CHKERRQ(ierr);
1842     }
1843     /* Set Up KSP for Neumann problem of BDDC */
1844     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1845   }
1846   /* free Neumann problem's matrix */
1847   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1848 
1849   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1850   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1851     if (pcbddc->dbg_flag) {
1852       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1853       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1854       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1855     }
1856     if (dirichlet) { /* Dirichlet */
1857       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1858       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1859       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1860       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1861       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1862       /* need to be adapted? */
1863       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1864       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1865       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1866       /* print info */
1867       if (pcbddc->dbg_flag) {
1868         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1869         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1870       }
1871       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1872         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcis->is_I_local);CHKERRQ(ierr);
1873       }
1874     }
1875     if (neumann) { /* Neumann */
1876       ierr = KSPGetOperators(pcbddc->ksp_R,&A_RR,NULL);CHKERRQ(ierr);
1877       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1878       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1879       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1880       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1881       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1882       /* need to be adapted? */
1883       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1884       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1885       /* print info */
1886       if (pcbddc->dbg_flag) {
1887         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1888         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1889       }
1890       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1891         ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->is_R_local);CHKERRQ(ierr);
1892       }
1893     }
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1900 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
1901 {
1902   PetscErrorCode  ierr;
1903   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1904   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1905 
1906   PetscFunctionBegin;
1907   if (!sub_schurs->reuse_mumps) {
1908     ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr);
1909   }
1910   if (!pcbddc->switch_static) {
1911     if (applytranspose && pcbddc->local_auxmat1) {
1912       ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1913       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1914     }
1915     if (!sub_schurs->reuse_mumps) {
1916       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1917       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1918     } else {
1919       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1920 
1921       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1922       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,inout_B,reuse_mumps->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1923     }
1924   } else {
1925     ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1926     ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1927     ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1928     ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1929     if (applytranspose && pcbddc->local_auxmat1) {
1930       ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr);
1931       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1932       ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1933       ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1934     }
1935   }
1936   if (!sub_schurs->reuse_mumps) {
1937     if (applytranspose) {
1938       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1939     } else {
1940       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1941     }
1942 #if defined(PETSC_HAVE_MUMPS)
1943   } else {
1944     PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1945 
1946     if (applytranspose) {
1947       ierr = MatMumpsSolveSchurComplementTranspose(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1948     } else {
1949       ierr = MatMumpsSolveSchurComplement(reuse_mumps->F,reuse_mumps->rhs_B,reuse_mumps->sol_B);CHKERRQ(ierr);
1950     }
1951 #endif
1952   }
1953   ierr = VecSet(inout_B,0.);CHKERRQ(ierr);
1954   if (!pcbddc->switch_static) {
1955     if (!sub_schurs->reuse_mumps) {
1956       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1957       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1958     } else {
1959       PCBDDCReuseMumps reuse_mumps = sub_schurs->reuse_mumps;
1960 
1961       ierr = VecScatterBegin(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1962       ierr = VecScatterEnd(reuse_mumps->correction_scatter_B,reuse_mumps->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1963     }
1964     if (!applytranspose && pcbddc->local_auxmat1) {
1965       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1966       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr);
1967     }
1968   } else {
1969     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1970     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1971     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1972     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1973     if (!applytranspose && pcbddc->local_auxmat1) {
1974       ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr);
1975       ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
1976     }
1977     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1978     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1979     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1980     ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1981   }
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1986 #undef __FUNCT__
1987 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1988 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1989 {
1990   PetscErrorCode ierr;
1991   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1992   PC_IS*            pcis = (PC_IS*)  (pc->data);
1993   const PetscScalar zero = 0.0;
1994 
1995   PetscFunctionBegin;
1996   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1997   if (applytranspose) {
1998     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1999     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2000   } else {
2001     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
2002     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
2003   }
2004 
2005   /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
2006   if (pcbddc->benign_change) {
2007     PetscScalar *array;
2008 
2009     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2010     array[pcbddc->local_primal_size-1] += pcbddc->benign_p0;
2011     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2012   }
2013 
2014   /* start communications from local primal nodes to rhs of coarse solver */
2015   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
2016   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2017   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2018 
2019   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
2020   /* TODO remove null space when doing multilevel */
2021   if (pcbddc->coarse_ksp) {
2022     Vec rhs,sol;
2023 
2024     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
2025     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
2026     if (applytranspose) {
2027       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2028     } else {
2029       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
2030     }
2031   }
2032 
2033   /* Local solution on R nodes */
2034   if (pcis->n) { /* in/out pcbddc->vec1_B,pcbddc->vec1_D */
2035     ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr);
2036   }
2037 
2038   /* communications from coarse sol to local primal nodes */
2039   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2040   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2041 
2042   /* Sum contributions from two levels */
2043   if (applytranspose) {
2044     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2045     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2046   } else {
2047     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
2048     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
2049   }
2050   /* store p0 */
2051   if (pcbddc->benign_change) {
2052     PetscScalar *array;
2053 
2054     ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2055     pcbddc->benign_p0 = array[pcbddc->local_primal_size-1];
2056     ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr);
2057   }
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 #undef __FUNCT__
2062 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
2063 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
2064 {
2065   PetscErrorCode ierr;
2066   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2067   PetscScalar    *array;
2068   Vec            from,to;
2069 
2070   PetscFunctionBegin;
2071   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2072     from = pcbddc->coarse_vec;
2073     to = pcbddc->vec1_P;
2074     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2075       Vec tvec;
2076 
2077       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2078       ierr = VecResetArray(tvec);CHKERRQ(ierr);
2079       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2080       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
2081       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
2082       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
2083     }
2084   } else { /* from local to global -> put data in coarse right hand side */
2085     from = pcbddc->vec1_P;
2086     to = pcbddc->coarse_vec;
2087   }
2088   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
2094 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
2095 {
2096   PetscErrorCode ierr;
2097   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
2098   PetscScalar    *array;
2099   Vec            from,to;
2100 
2101   PetscFunctionBegin;
2102   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
2103     from = pcbddc->coarse_vec;
2104     to = pcbddc->vec1_P;
2105   } else { /* from local to global -> put data in coarse right hand side */
2106     from = pcbddc->vec1_P;
2107     to = pcbddc->coarse_vec;
2108   }
2109   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
2110   if (smode == SCATTER_FORWARD) {
2111     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
2112       Vec tvec;
2113 
2114       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
2115       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
2116       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
2117       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
2118     }
2119   } else {
2120     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
2121      ierr = VecResetArray(from);CHKERRQ(ierr);
2122     }
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 /* uncomment for testing purposes */
2128 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
2129 #undef __FUNCT__
2130 #define __FUNCT__ "PCBDDCConstraintsSetUp"
2131 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
2132 {
2133   PetscErrorCode    ierr;
2134   PC_IS*            pcis = (PC_IS*)(pc->data);
2135   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
2136   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
2137   /* one and zero */
2138   PetscScalar       one=1.0,zero=0.0;
2139   /* space to store constraints and their local indices */
2140   PetscScalar       *constraints_data;
2141   PetscInt          *constraints_idxs,*constraints_idxs_B;
2142   PetscInt          *constraints_idxs_ptr,*constraints_data_ptr;
2143   PetscInt          *constraints_n;
2144   /* iterators */
2145   PetscInt          i,j,k,total_counts,total_counts_cc,cum;
2146   /* BLAS integers */
2147   PetscBLASInt      lwork,lierr;
2148   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
2149   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
2150   /* reuse */
2151   PetscInt          olocal_primal_size,olocal_primal_size_cc;
2152   PetscInt          *olocal_primal_ref_node,*olocal_primal_ref_mult;
2153   /* change of basis */
2154   PetscBool         qr_needed;
2155   PetscBT           change_basis,qr_needed_idx;
2156   /* auxiliary stuff */
2157   PetscInt          *nnz,*is_indices;
2158   PetscInt          ncc;
2159   /* some quantities */
2160   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
2161   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
2162 
2163   PetscFunctionBegin;
2164   /* Destroy Mat objects computed previously */
2165   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2166   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2167   /* save info on constraints from previous setup (if any) */
2168   olocal_primal_size = pcbddc->local_primal_size;
2169   olocal_primal_size_cc = pcbddc->local_primal_size_cc;
2170   ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr);
2171   ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2172   ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr);
2173   ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2174   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2175 
2176   /* print some info */
2177   if (pcbddc->dbg_flag) {
2178     IS       vertices;
2179     PetscInt nv,nedges,nfaces;
2180     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
2181     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
2182     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
2183     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
2184     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2185     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
2186     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
2187     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
2188     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2189   }
2190 
2191   if (!pcbddc->adaptive_selection) {
2192     IS           ISForVertices,*ISForFaces,*ISForEdges;
2193     MatNullSpace nearnullsp;
2194     const Vec    *nearnullvecs;
2195     Vec          *localnearnullsp;
2196     PetscScalar  *array;
2197     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
2198     PetscBool    nnsp_has_cnst;
2199     /* LAPACK working arrays for SVD or POD */
2200     PetscBool    skip_lapack,boolforchange;
2201     PetscScalar  *work;
2202     PetscReal    *singular_vals;
2203 #if defined(PETSC_USE_COMPLEX)
2204     PetscReal    *rwork;
2205 #endif
2206 #if defined(PETSC_MISSING_LAPACK_GESVD)
2207     PetscScalar  *temp_basis,*correlation_mat;
2208 #else
2209     PetscBLASInt dummy_int=1;
2210     PetscScalar  dummy_scalar=1.;
2211 #endif
2212 
2213     /* Get index sets for faces, edges and vertices from graph */
2214     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
2215     /* free unneeded index sets */
2216     if (!pcbddc->use_vertices) {
2217       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2218     }
2219     if (!pcbddc->use_edges) {
2220       for (i=0;i<n_ISForEdges;i++) {
2221         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2222       }
2223       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2224       n_ISForEdges = 0;
2225     }
2226     if (!pcbddc->use_faces) {
2227       for (i=0;i<n_ISForFaces;i++) {
2228         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2229       }
2230       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2231       n_ISForFaces = 0;
2232     }
2233 
2234 #if defined(PETSC_USE_DEBUG)
2235     /* HACK: when solving singular problems not using vertices, a change of basis is mandatory.
2236        Also use_change_of_basis should be consistent among processors */
2237     if (pcbddc->NullSpace) {
2238       PetscBool tbool[2],gbool[2];
2239 
2240       if (!ISForVertices && !pcbddc->user_ChangeOfBasisMatrix) {
2241         pcbddc->use_change_of_basis = PETSC_TRUE;
2242         if (!ISForEdges) {
2243           pcbddc->use_change_on_faces = PETSC_TRUE;
2244         }
2245       }
2246       tbool[0] = pcbddc->use_change_of_basis;
2247       tbool[1] = pcbddc->use_change_on_faces;
2248       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2249       pcbddc->use_change_of_basis = gbool[0];
2250       pcbddc->use_change_on_faces = gbool[1];
2251     }
2252 #endif
2253 
2254     /* check if near null space is attached to global mat */
2255     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
2256     if (nearnullsp) {
2257       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
2258       /* remove any stored info */
2259       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
2260       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2261       /* store information for BDDC solver reuse */
2262       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
2263       pcbddc->onearnullspace = nearnullsp;
2264       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
2265       for (i=0;i<nnsp_size;i++) {
2266         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
2267       }
2268     } else { /* if near null space is not provided BDDC uses constants by default */
2269       nnsp_size = 0;
2270       nnsp_has_cnst = PETSC_TRUE;
2271     }
2272     /* get max number of constraints on a single cc */
2273     max_constraints = nnsp_size;
2274     if (nnsp_has_cnst) max_constraints++;
2275 
2276     /*
2277          Evaluate maximum storage size needed by the procedure
2278          - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
2279          - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
2280          There can be multiple constraints per connected component
2281                                                                                                                                                            */
2282     n_vertices = 0;
2283     if (ISForVertices) {
2284       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
2285     }
2286     ncc = n_vertices+n_ISForFaces+n_ISForEdges;
2287     ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr);
2288 
2289     total_counts = n_ISForFaces+n_ISForEdges;
2290     total_counts *= max_constraints;
2291     total_counts += n_vertices;
2292     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2293 
2294     total_counts = 0;
2295     max_size_of_constraint = 0;
2296     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
2297       IS used_is;
2298       if (i<n_ISForEdges) {
2299         used_is = ISForEdges[i];
2300       } else {
2301         used_is = ISForFaces[i-n_ISForEdges];
2302       }
2303       ierr = ISGetSize(used_is,&j);CHKERRQ(ierr);
2304       total_counts += j;
2305       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
2306     }
2307     ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr);
2308 
2309     /* get local part of global near null space vectors */
2310     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
2311     for (k=0;k<nnsp_size;k++) {
2312       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
2313       ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2314       ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2315     }
2316 
2317     /* whether or not to skip lapack calls */
2318     skip_lapack = PETSC_TRUE;
2319     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
2320 
2321     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
2322     if (!skip_lapack) {
2323       PetscScalar temp_work;
2324 
2325 #if defined(PETSC_MISSING_LAPACK_GESVD)
2326       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
2327       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
2328       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
2329       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
2330 #if defined(PETSC_USE_COMPLEX)
2331       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
2332 #endif
2333       /* now we evaluate the optimal workspace using query with lwork=-1 */
2334       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2335       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
2336       lwork = -1;
2337       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2338 #if !defined(PETSC_USE_COMPLEX)
2339       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
2340 #else
2341       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
2342 #endif
2343       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2344       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
2345 #else /* on missing GESVD */
2346       /* SVD */
2347       PetscInt max_n,min_n;
2348       max_n = max_size_of_constraint;
2349       min_n = max_constraints;
2350       if (max_size_of_constraint < max_constraints) {
2351         min_n = max_size_of_constraint;
2352         max_n = max_constraints;
2353       }
2354       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
2355 #if defined(PETSC_USE_COMPLEX)
2356       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
2357 #endif
2358       /* now we evaluate the optimal workspace using query with lwork=-1 */
2359       lwork = -1;
2360       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
2361       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
2362       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
2363       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2364 #if !defined(PETSC_USE_COMPLEX)
2365       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
2366 #else
2367       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
2368 #endif
2369       ierr = PetscFPTrapPop();CHKERRQ(ierr);
2370       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
2371 #endif /* on missing GESVD */
2372       /* Allocate optimal workspace */
2373       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
2374       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
2375     }
2376     /* Now we can loop on constraining sets */
2377     total_counts = 0;
2378     constraints_idxs_ptr[0] = 0;
2379     constraints_data_ptr[0] = 0;
2380     /* vertices */
2381     if (n_vertices) {
2382       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2383       if (nnsp_has_cnst) { /* it considers all possible vertices */
2384         ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2385         for (i=0;i<n_vertices;i++) {
2386           constraints_n[total_counts] = 1;
2387           constraints_data[total_counts] = 1.0;
2388           constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2389           constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2390           total_counts++;
2391         }
2392       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2393         PetscBool used_vertex;
2394         for (i=0;i<n_vertices;i++) {
2395           used_vertex = PETSC_FALSE;
2396           k = 0;
2397           while (!used_vertex && k<nnsp_size) {
2398             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2399             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2400               constraints_n[total_counts] = 1;
2401               constraints_idxs[total_counts] = is_indices[i];
2402               constraints_data[total_counts] = 1.0;
2403               constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
2404               constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
2405               total_counts++;
2406               used_vertex = PETSC_TRUE;
2407             }
2408             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2409             k++;
2410           }
2411         }
2412       }
2413       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2414       n_vertices = total_counts;
2415     }
2416 
2417     /* edges and faces */
2418     total_counts_cc = total_counts;
2419     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2420       IS        used_is;
2421       PetscBool idxs_copied = PETSC_FALSE;
2422 
2423       if (ncc<n_ISForEdges) {
2424         used_is = ISForEdges[ncc];
2425         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2426       } else {
2427         used_is = ISForFaces[ncc-n_ISForEdges];
2428         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2429       }
2430       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2431 
2432       ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr);
2433       ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2434       /* change of basis should not be performed on local periodic nodes */
2435       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2436       if (nnsp_has_cnst) {
2437         PetscScalar quad_value;
2438 
2439         ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2440         idxs_copied = PETSC_TRUE;
2441 
2442         if (!pcbddc->use_nnsp_true) {
2443           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2444         } else {
2445           quad_value = 1.0;
2446         }
2447         for (j=0;j<size_of_constraint;j++) {
2448           constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
2449         }
2450         temp_constraints++;
2451         total_counts++;
2452       }
2453       for (k=0;k<nnsp_size;k++) {
2454         PetscReal real_value;
2455         PetscScalar *ptr_to_data;
2456 
2457         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2458         ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
2459         for (j=0;j<size_of_constraint;j++) {
2460           ptr_to_data[j] = array[is_indices[j]];
2461         }
2462         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2463         /* check if array is null on the connected component */
2464         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2465         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
2466         if (real_value > 0.0) { /* keep indices and values */
2467           temp_constraints++;
2468           total_counts++;
2469           if (!idxs_copied) {
2470             ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2471             idxs_copied = PETSC_TRUE;
2472           }
2473         }
2474       }
2475       ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2476       valid_constraints = temp_constraints;
2477       if (!pcbddc->use_nnsp_true && temp_constraints) {
2478         if (temp_constraints == 1) { /* just normalize the constraint */
2479           PetscScalar norm,*ptr_to_data;
2480 
2481           ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2482           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2483           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
2484           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2485           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
2486         } else { /* perform SVD */
2487           PetscReal   tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2488           PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
2489 
2490 #if defined(PETSC_MISSING_LAPACK_GESVD)
2491           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2492              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2493              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2494                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2495                 from that computed using LAPACKgesvd
2496              -> This is due to a different computation of eigenvectors in LAPACKheev
2497              -> The quality of the POD-computed basis will be the same */
2498           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2499           /* Store upper triangular part of correlation matrix */
2500           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2501           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2502           for (j=0;j<temp_constraints;j++) {
2503             for (k=0;k<j+1;k++) {
2504               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
2505             }
2506           }
2507           /* compute eigenvalues and eigenvectors of correlation matrix */
2508           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2509           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2510 #if !defined(PETSC_USE_COMPLEX)
2511           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2512 #else
2513           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2514 #endif
2515           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2516           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2517           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2518           j = 0;
2519           while (j < temp_constraints && singular_vals[j] < tol) j++;
2520           total_counts = total_counts-j;
2521           valid_constraints = temp_constraints-j;
2522           /* scale and copy POD basis into used quadrature memory */
2523           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2524           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2525           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2526           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2527           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2528           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2529           if (j<temp_constraints) {
2530             PetscInt ii;
2531             for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
2532             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2533             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2534             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2535             for (k=0;k<temp_constraints-j;k++) {
2536               for (ii=0;ii<size_of_constraint;ii++) {
2537                 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2538               }
2539             }
2540           }
2541 #else  /* on missing GESVD */
2542           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2543           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2544           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2545           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2546 #if !defined(PETSC_USE_COMPLEX)
2547           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2548 #else
2549           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2550 #endif
2551           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2552           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2553           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2554           k = temp_constraints;
2555           if (k > size_of_constraint) k = size_of_constraint;
2556           j = 0;
2557           while (j < k && singular_vals[k-j-1] < tol) j++;
2558           valid_constraints = k-j;
2559           total_counts = total_counts-temp_constraints+valid_constraints;
2560 #endif /* on missing GESVD */
2561         }
2562       }
2563       /* update pointers information */
2564       if (valid_constraints) {
2565         constraints_n[total_counts_cc] = valid_constraints;
2566         constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
2567         constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
2568         /* set change_of_basis flag */
2569         if (boolforchange) {
2570           PetscBTSet(change_basis,total_counts_cc);
2571         }
2572         total_counts_cc++;
2573       }
2574     }
2575     /* free workspace */
2576     if (!skip_lapack) {
2577       ierr = PetscFree(work);CHKERRQ(ierr);
2578 #if defined(PETSC_USE_COMPLEX)
2579       ierr = PetscFree(rwork);CHKERRQ(ierr);
2580 #endif
2581       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2582 #if defined(PETSC_MISSING_LAPACK_GESVD)
2583       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2584       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2585 #endif
2586     }
2587     for (k=0;k<nnsp_size;k++) {
2588       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2589     }
2590     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2591     /* free index sets of faces, edges and vertices */
2592     for (i=0;i<n_ISForFaces;i++) {
2593       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2594     }
2595     if (n_ISForFaces) {
2596       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2597     }
2598     for (i=0;i<n_ISForEdges;i++) {
2599       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2600     }
2601     if (n_ISForEdges) {
2602       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2603     }
2604     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2605   } else {
2606     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2607 
2608     total_counts = 0;
2609     n_vertices = 0;
2610     if (sub_schurs->is_vertices && pcbddc->use_vertices) {
2611       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
2612     }
2613     max_constraints = 0;
2614     total_counts_cc = 0;
2615     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2616       total_counts += pcbddc->adaptive_constraints_n[i];
2617       if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
2618       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2619     }
2620     constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
2621     constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
2622     constraints_idxs = pcbddc->adaptive_constraints_idxs;
2623     constraints_data = pcbddc->adaptive_constraints_data;
2624     /* constraints_n differs from pcbddc->adaptive_constraints_n */
2625     ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr);
2626     total_counts_cc = 0;
2627     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2628       if (pcbddc->adaptive_constraints_n[i]) {
2629         constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
2630       }
2631     }
2632 #if 0
2633     printf("Found %d totals (%d)\n",total_counts_cc,total_counts);
2634     for (i=0;i<total_counts_cc;i++) {
2635       printf("const %d, start %d",i,constraints_idxs_ptr[i]);
2636       printf(" end %d:\n",constraints_idxs_ptr[i+1]);
2637       for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) {
2638         printf(" %d",constraints_idxs[j]);
2639       }
2640       printf("\n");
2641       printf("number of cc: %d\n",constraints_n[i]);
2642     }
2643     for (i=0;i<n_vertices;i++) {
2644       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2645     }
2646     for (i=0;i<sub_schurs->n_subs;i++) {
2647       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2648     }
2649 #endif
2650 
2651     max_size_of_constraint = 0;
2652     for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
2653     ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr);
2654     /* Change of basis */
2655     ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr);
2656     if (pcbddc->use_change_of_basis) {
2657       for (i=0;i<sub_schurs->n_subs;i++) {
2658         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2659           ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr);
2660         }
2661       }
2662     }
2663   }
2664   pcbddc->local_primal_size = total_counts;
2665   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2666   ierr = PetscMalloc1(pcbddc->local_primal_size+1,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2667 
2668   /* map constraints_idxs in boundary numbering */
2669   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr);
2670   if (i != constraints_idxs_ptr[total_counts_cc]) {
2671     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",constraints_idxs_ptr[total_counts_cc],i);
2672   }
2673 
2674   /* Create constraint matrix */
2675   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2676   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2677   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2678 
2679   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2680   /* determine if a QR strategy is needed for change of basis */
2681   qr_needed = PETSC_FALSE;
2682   ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr);
2683   total_primal_vertices=0;
2684   pcbddc->local_primal_size_cc = 0;
2685   for (i=0;i<total_counts_cc;i++) {
2686     size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2687     if (size_of_constraint == 1) {
2688       pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
2689       pcbddc->local_primal_size_cc += 1;
2690     } else if (PetscBTLookup(change_basis,i)) {
2691       for (k=0;k<constraints_n[i];k++) {
2692         pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2693       }
2694       pcbddc->local_primal_size_cc += constraints_n[i];
2695       if (constraints_n[i] > 1 || pcbddc->use_qr_single || pcbddc->faster_deluxe) {
2696         PetscBTSet(qr_needed_idx,i);
2697         qr_needed = PETSC_TRUE;
2698       }
2699     } else {
2700       pcbddc->local_primal_size_cc += 1;
2701     }
2702   }
2703   /* note that the local variable n_vertices used below stores the number of pointwise constraints */
2704   pcbddc->n_vertices = total_primal_vertices;
2705   /* permute indices in order to have a sorted set of vertices */
2706   ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2707 
2708   /* allocating one extra space (in case an extra primal dof should be stored for the benign trick */
2709   ierr = PetscMalloc2(pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+1,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr);
2710   ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2711   for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
2712 
2713   /* nonzero structure of constraint matrix */
2714   /* and get reference dof for local constraints */
2715   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2716   for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
2717 
2718   j = total_primal_vertices;
2719   total_counts = total_primal_vertices;
2720   cum = total_primal_vertices;
2721   for (i=n_vertices;i<total_counts_cc;i++) {
2722     if (!PetscBTLookup(change_basis,i)) {
2723       pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
2724       pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
2725       cum++;
2726       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2727       for (k=0;k<constraints_n[i];k++) {
2728         pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
2729         nnz[j+k] = size_of_constraint;
2730       }
2731       j += constraints_n[i];
2732     }
2733   }
2734   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2735   ierr = PetscFree(nnz);CHKERRQ(ierr);
2736 
2737   /* set values in constraint matrix */
2738   for (i=0;i<total_primal_vertices;i++) {
2739     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2740   }
2741   total_counts = total_primal_vertices;
2742   for (i=n_vertices;i<total_counts_cc;i++) {
2743     if (!PetscBTLookup(change_basis,i)) {
2744       PetscInt *cols;
2745 
2746       size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2747       cols = constraints_idxs+constraints_idxs_ptr[i];
2748       for (k=0;k<constraints_n[i];k++) {
2749         PetscInt    row = total_counts+k;
2750         PetscScalar *vals;
2751 
2752         vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
2753         ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2754       }
2755       total_counts += constraints_n[i];
2756     }
2757   }
2758   /* assembling */
2759   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761 
2762   /*
2763   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2764   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2765   */
2766   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2767   if (pcbddc->use_change_of_basis) {
2768     /* dual and primal dofs on a single cc */
2769     PetscInt     dual_dofs,primal_dofs;
2770     /* working stuff for GEQRF */
2771     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2772     PetscBLASInt lqr_work;
2773     /* working stuff for UNGQR */
2774     PetscScalar  *gqr_work,lgqr_work_t;
2775     PetscBLASInt lgqr_work;
2776     /* working stuff for TRTRS */
2777     PetscScalar  *trs_rhs;
2778     PetscBLASInt Blas_NRHS;
2779     /* pointers for values insertion into change of basis matrix */
2780     PetscInt     *start_rows,*start_cols;
2781     PetscScalar  *start_vals;
2782     /* working stuff for values insertion */
2783     PetscBT      is_primal;
2784     PetscInt     *aux_primal_numbering_B;
2785     /* matrix sizes */
2786     PetscInt     global_size,local_size;
2787     /* temporary change of basis */
2788     Mat          localChangeOfBasisMatrix;
2789     /* extra space for debugging */
2790     PetscScalar  *dbg_work;
2791 
2792     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2793     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2794     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2795     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2796     /* nonzeros for local mat */
2797     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2798     for (i=0;i<pcis->n;i++) nnz[i]=1;
2799     for (i=n_vertices;i<total_counts_cc;i++) {
2800       if (PetscBTLookup(change_basis,i)) {
2801         size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
2802         if (PetscBTLookup(qr_needed_idx,i)) {
2803           for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
2804         } else {
2805           nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
2806           for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
2807         }
2808       }
2809     }
2810     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2811     ierr = PetscFree(nnz);CHKERRQ(ierr);
2812     /* Set initial identity in the matrix */
2813     for (i=0;i<pcis->n;i++) {
2814       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2815     }
2816 
2817     if (pcbddc->dbg_flag) {
2818       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2819       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2820     }
2821 
2822 
2823     /* Now we loop on the constraints which need a change of basis */
2824     /*
2825        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2826        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2827 
2828        Basic blocks of change of basis matrix T computed by
2829 
2830           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2831 
2832             | 1        0   ...        0         s_1/S |
2833             | 0        1   ...        0         s_2/S |
2834             |              ...                        |
2835             | 0        ...            1     s_{n-1}/S |
2836             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2837 
2838             with S = \sum_{i=1}^n s_i^2
2839             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2840                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2841 
2842           - QR decomposition of constraints otherwise
2843     */
2844     if (qr_needed) {
2845       /* space to store Q */
2846       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2847       /* first we issue queries for optimal work */
2848       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2849       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2850       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2851       lqr_work = -1;
2852       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2853       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2854       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2855       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2856       lgqr_work = -1;
2857       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2858       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2859       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2860       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2861       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2862       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2863       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2864       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2865       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2866       /* array to store scaling factors for reflectors */
2867       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2868       /* array to store rhs and solution of triangular solver */
2869       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2870       /* allocating workspace for check */
2871       if (pcbddc->dbg_flag) {
2872         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2873       }
2874     }
2875     /* array to store whether a node is primal or not */
2876     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2877     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2878     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2879     if (i != total_primal_vertices) {
2880       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2881     }
2882     for (i=0;i<total_primal_vertices;i++) {
2883       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2884     }
2885     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2886 
2887     /* loop on constraints and see whether or not they need a change of basis and compute it */
2888     for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
2889       size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
2890       if (PetscBTLookup(change_basis,total_counts)) {
2891         /* get constraint info */
2892         primal_dofs = constraints_n[total_counts];
2893         dual_dofs = size_of_constraint-primal_dofs;
2894 
2895         if (pcbddc->dbg_flag) {
2896           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr);
2897         }
2898 
2899         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2900 
2901           /* copy quadrature constraints for change of basis check */
2902           if (pcbddc->dbg_flag) {
2903             ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2904           }
2905           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2906           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2907 
2908           /* compute QR decomposition of constraints */
2909           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2910           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2911           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2912           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2913           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2914           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2915           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2916 
2917           /* explictly compute R^-T */
2918           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2919           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2920           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2921           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2922           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2923           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2924           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2925           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2926           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2927           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2928 
2929           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2930           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2931           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2932           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2933           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2934           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2935           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2936           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2937           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2938 
2939           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2940              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2941              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2942           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2943           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2944           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2945           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2946           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2947           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2948           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2949           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
2950           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2951           ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2952 
2953           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2954           start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
2955           /* insert cols for primal dofs */
2956           for (j=0;j<primal_dofs;j++) {
2957             start_vals = &qr_basis[j*size_of_constraint];
2958             start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
2959             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2960           }
2961           /* insert cols for dual dofs */
2962           for (j=0,k=0;j<dual_dofs;k++) {
2963             if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
2964               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2965               start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
2966               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2967               j++;
2968             }
2969           }
2970 
2971           /* check change of basis */
2972           if (pcbddc->dbg_flag) {
2973             PetscInt   ii,jj;
2974             PetscBool valid_qr=PETSC_TRUE;
2975             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2976             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2977             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2978             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2979             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2980             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2981             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2982             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2983             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2984             for (jj=0;jj<size_of_constraint;jj++) {
2985               for (ii=0;ii<primal_dofs;ii++) {
2986                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2987                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2988               }
2989             }
2990             if (!valid_qr) {
2991               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2992               for (jj=0;jj<size_of_constraint;jj++) {
2993                 for (ii=0;ii<primal_dofs;ii++) {
2994                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2995                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2996                   }
2997                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2998                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2999                   }
3000                 }
3001               }
3002             } else {
3003               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
3004             }
3005           }
3006         } else { /* simple transformation block */
3007           PetscInt    row,col;
3008           PetscScalar val,norm;
3009 
3010           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
3011           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
3012           for (j=0;j<size_of_constraint;j++) {
3013             PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
3014             row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
3015             if (!PetscBTLookup(is_primal,row_B)) {
3016               col = constraints_idxs[constraints_idxs_ptr[total_counts]];
3017               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
3018               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
3019             } else {
3020               for (k=0;k<size_of_constraint;k++) {
3021                 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
3022                 if (row != col) {
3023                   val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
3024                 } else {
3025                   val = constraints_data[constraints_data_ptr[total_counts]]/norm;
3026                 }
3027                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
3028               }
3029             }
3030           }
3031           if (pcbddc->dbg_flag) {
3032             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
3033           }
3034         }
3035       } else {
3036         if (pcbddc->dbg_flag) {
3037           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr);
3038         }
3039       }
3040     }
3041 
3042     /* free workspace */
3043     if (qr_needed) {
3044       if (pcbddc->dbg_flag) {
3045         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
3046       }
3047       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
3048       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
3049       ierr = PetscFree(qr_work);CHKERRQ(ierr);
3050       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
3051       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
3052     }
3053     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
3054     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056 
3057     /* assembling of global change of variable */
3058     {
3059       Mat      tmat;
3060       PetscInt bs;
3061 
3062       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3063       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3064       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
3065       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
3066       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3067       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
3068       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
3069       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
3070       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3071       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
3072       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3073       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3074       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3075       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
3076       ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3077       ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3078       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
3079       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
3080     }
3081     /* check */
3082     if (pcbddc->dbg_flag) {
3083       PetscReal error;
3084       Vec       x,x_change;
3085 
3086       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
3087       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
3088       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
3089       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
3090       ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3091       ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3092       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
3093       ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3094       ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3095       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
3096       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
3097       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
3098       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3099       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
3100       ierr = VecDestroy(&x);CHKERRQ(ierr);
3101       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
3102     }
3103 
3104     /* adapt sub_schurs computed (if any) */
3105     if (pcbddc->use_deluxe_scaling) {
3106       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
3107       if (sub_schurs->S_Ej_all) {
3108         Mat S_new,tmat;
3109         IS is_all_N;
3110 
3111         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
3112         ierr = MatGetSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
3113         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
3114         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3115         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
3116         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3117         sub_schurs->S_Ej_all = S_new;
3118         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3119         if (sub_schurs->sum_S_Ej_all) {
3120           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
3121           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
3122           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
3123           sub_schurs->sum_S_Ej_all = S_new;
3124           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
3125         }
3126         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
3127       }
3128     }
3129     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
3130   } else if (pcbddc->user_ChangeOfBasisMatrix) {
3131     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3132     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
3133   }
3134 
3135   /* set up change of basis context */
3136   if (pcbddc->ChangeOfBasisMatrix) {
3137     PCBDDCChange_ctx change_ctx;
3138 
3139     if (!pcbddc->new_global_mat) {
3140       PetscInt global_size,local_size;
3141 
3142       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
3143       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
3144       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
3145       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
3146       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
3147       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
3148       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
3149       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
3150       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
3151     } else {
3152       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
3153       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
3154       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
3155     }
3156     if (!pcbddc->user_ChangeOfBasisMatrix) {
3157       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
3158       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
3159     } else {
3160       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
3161       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
3162     }
3163     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
3164     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
3165     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3166     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3167   }
3168 
3169   /* add pressure dof to set of primal nodes for numbering purposes */
3170   if (pcbddc->zerodiag) {
3171     const PetscInt *idxs;
3172     PetscInt       nz;
3173 
3174     ierr = ISGetLocalSize(pcbddc->zerodiag,&nz);CHKERRQ(ierr);
3175     ierr = ISGetIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
3176     pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = idxs[nz-1];
3177     pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size_cc] = idxs[nz-1];
3178     ierr = ISRestoreIndices(pcbddc->zerodiag,&idxs);CHKERRQ(ierr);
3179     pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
3180     pcbddc->local_primal_size_cc++;
3181     pcbddc->local_primal_size++;
3182   }
3183 
3184   /* check if a new primal space has been introduced (also take into account benign trick) */
3185   pcbddc->new_primal_space_local = PETSC_TRUE;
3186   if (olocal_primal_size == pcbddc->local_primal_size) {
3187     ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3188     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3189     if (!pcbddc->new_primal_space_local) {
3190       ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscScalar),&pcbddc->new_primal_space_local);CHKERRQ(ierr);
3191       pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
3192     }
3193   }
3194   ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr);
3195   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
3196   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3197 
3198   /* flush dbg viewer */
3199   if (pcbddc->dbg_flag) {
3200     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3201   }
3202 
3203   /* free workspace */
3204   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
3205   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
3206   if (!pcbddc->adaptive_selection) {
3207     ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr);
3208     ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr);
3209   } else {
3210     ierr = PetscFree5(pcbddc->adaptive_constraints_n,
3211                       pcbddc->adaptive_constraints_idxs_ptr,
3212                       pcbddc->adaptive_constraints_data_ptr,
3213                       pcbddc->adaptive_constraints_idxs,
3214                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
3215     ierr = PetscFree(constraints_n);CHKERRQ(ierr);
3216     ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr);
3217   }
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 #undef __FUNCT__
3222 #define __FUNCT__ "PCBDDCAnalyzeInterface"
3223 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
3224 {
3225   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
3226   PC_IS       *pcis = (PC_IS*)pc->data;
3227   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
3228   PetscInt    ierr,i,vertex_size,N;
3229   PetscViewer viewer=pcbddc->dbg_viewer;
3230 
3231   PetscFunctionBegin;
3232   /* Reset previously computed graph */
3233   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
3234   /* Init local Graph struct */
3235   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
3236   ierr = PCBDDCGraphInit(pcbddc->mat_graph,pcis->mapping,N);CHKERRQ(ierr);
3237 
3238   /* Check validity of the csr graph passed in by the user */
3239   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
3240     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
3241   }
3242 
3243   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
3244   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
3245     PetscInt  *xadj,*adjncy;
3246     PetscInt  nvtxs;
3247     PetscBool flg_row=PETSC_FALSE;
3248 
3249     if (pcbddc->use_local_adj) {
3250 
3251       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3252       if (flg_row) {
3253         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
3254         pcbddc->computed_rowadj = PETSC_TRUE;
3255       }
3256       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3257     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
3258       IS                     is_dummy;
3259       ISLocalToGlobalMapping l2gmap_dummy;
3260       PetscInt               j,sum;
3261       PetscInt               *cxadj,*cadjncy;
3262       const PetscInt         *idxs;
3263       PCBDDCGraph            graph;
3264       PetscBT                is_on_boundary;
3265 
3266       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
3267       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
3268       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
3269       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
3270       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
3271       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
3272       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3273       if (flg_row) {
3274         graph->xadj = xadj;
3275         graph->adjncy = adjncy;
3276       }
3277       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
3278       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
3279       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
3280 
3281       if (pcbddc->dbg_flag) {
3282         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
3283         for (i=0;i<graph->ncc;i++) {
3284           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
3285         }
3286       }
3287 
3288       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
3289       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3290       for (i=0;i<pcis->n_B;i++) {
3291         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
3292       }
3293       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
3294 
3295       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
3296       sum = 0;
3297       for (i=0;i<graph->ncc;i++) {
3298         PetscInt sizecc = 0;
3299         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3300           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3301             sizecc++;
3302           }
3303         }
3304         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3305           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3306             cxadj[graph->queue[j]] = sizecc;
3307           }
3308         }
3309         sum += sizecc*sizecc;
3310       }
3311       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
3312       sum = 0;
3313       for (i=0;i<pcis->n;i++) {
3314         PetscInt temp = cxadj[i];
3315         cxadj[i] = sum;
3316         sum += temp;
3317       }
3318       cxadj[pcis->n] = sum;
3319       for (i=0;i<graph->ncc;i++) {
3320         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
3321           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
3322             PetscInt k,sizecc = 0;
3323             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
3324               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
3325                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
3326                 sizecc++;
3327               }
3328             }
3329           }
3330         }
3331       }
3332       if (sum) {
3333         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
3334       } else {
3335         ierr = PetscFree(cxadj);CHKERRQ(ierr);
3336         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
3337       }
3338       graph->xadj = 0;
3339       graph->adjncy = 0;
3340       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
3341       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
3342     }
3343   }
3344   if (pcbddc->dbg_flag) {
3345     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3346   }
3347 
3348   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3349   vertex_size = 1;
3350   if (pcbddc->user_provided_isfordofs) {
3351     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3352       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3353       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3354         ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3355         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3356       }
3357       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3358       pcbddc->n_ISForDofs = 0;
3359       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3360     }
3361     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3362     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3363   } else {
3364     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3365       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3366       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3367       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3368         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3369       }
3370     }
3371   }
3372 
3373   /* Setup of Graph */
3374   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3375     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3376   }
3377   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3378     ierr = PCBDDCGlobalToLocal(matis->rctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3379   }
3380   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3381 
3382   /* Graph's connected components analysis */
3383   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3384 
3385   /* print some info to stdout */
3386   if (pcbddc->dbg_flag) {
3387     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3388   }
3389 
3390   /* mark topography has done */
3391   pcbddc->recompute_topography = PETSC_FALSE;
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 /* given an index sets possibly with holes, renumbers the indexes removing the holes */
3396 #undef __FUNCT__
3397 #define __FUNCT__ "PCBDDCSubsetNumbering"
3398 PetscErrorCode PCBDDCSubsetNumbering(IS subset, IS subset_mult, PetscInt *N_n, IS *subset_n)
3399 {
3400   PetscSF        sf;
3401   PetscLayout    map;
3402   const PetscInt *idxs;
3403   PetscInt       *leaf_data,*root_data,*gidxs;
3404   PetscInt       N,n,i,lbounds[2],gbounds[2],Nl;
3405   PetscInt       n_n,nlocals,start,first_index;
3406   PetscMPIInt    commsize;
3407   PetscBool      first_found;
3408   PetscErrorCode ierr;
3409 
3410   PetscFunctionBegin;
3411   ierr = ISGetLocalSize(subset,&n);CHKERRQ(ierr);
3412   if (subset_mult) {
3413     PetscCheckSameComm(subset,1,subset_mult,2);
3414     ierr = ISGetLocalSize(subset,&i);CHKERRQ(ierr);
3415     if (i != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Local subset and multiplicity sizes don't match! %d != %d",n,i);
3416   }
3417   /* create workspace layout for computing global indices of subset */
3418   ierr = ISGetIndices(subset,&idxs);CHKERRQ(ierr);
3419   lbounds[0] = lbounds[1] = 0;
3420   for (i=0;i<n;i++) {
3421     if (idxs[i] < lbounds[0]) lbounds[0] = idxs[i];
3422     else if (idxs[i] > lbounds[1]) lbounds[1] = idxs[i];
3423   }
3424   lbounds[0] = -lbounds[0];
3425   ierr = MPI_Allreduce(lbounds,gbounds,2,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3426   gbounds[0] = -gbounds[0];
3427   N = gbounds[1] - gbounds[0] + 1;
3428   ierr = PetscLayoutCreate(PetscObjectComm((PetscObject)subset),&map);CHKERRQ(ierr);
3429   ierr = PetscLayoutSetBlockSize(map,1);CHKERRQ(ierr);
3430   ierr = PetscLayoutSetSize(map,N);CHKERRQ(ierr);
3431   ierr = PetscLayoutSetUp(map);CHKERRQ(ierr);
3432   ierr = PetscLayoutGetLocalSize(map,&Nl);CHKERRQ(ierr);
3433 
3434   /* create sf : leaf_data == multiplicity of indexes, root data == global index in layout */
3435   ierr = PetscMalloc2(n,&leaf_data,Nl,&root_data);CHKERRQ(ierr);
3436   if (subset_mult) {
3437     const PetscInt* idxs_mult;
3438 
3439     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3440     ierr = PetscMemcpy(leaf_data,idxs_mult,n*sizeof(PetscInt));CHKERRQ(ierr);
3441     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3442   } else {
3443     for (i=0;i<n;i++) leaf_data[i] = 1;
3444   }
3445   /* local size of new subset */
3446   n_n = 0;
3447   for (i=0;i<n;i++) n_n += leaf_data[i];
3448 
3449   /* global indexes in layout */
3450   ierr = PetscMalloc1(n_n,&gidxs);CHKERRQ(ierr); /* allocating possibly extra space in gidxs which will be used later */
3451   for (i=0;i<n;i++) gidxs[i] = idxs[i] - gbounds[0];
3452   ierr = ISRestoreIndices(subset,&idxs);CHKERRQ(ierr);
3453   ierr = PetscSFCreate(PetscObjectComm((PetscObject)subset),&sf);CHKERRQ(ierr);
3454   ierr = PetscSFSetGraphLayout(sf,map,n,NULL,PETSC_COPY_VALUES,gidxs);CHKERRQ(ierr);
3455   ierr = PetscLayoutDestroy(&map);CHKERRQ(ierr);
3456 
3457   /* reduce from leaves to roots */
3458   ierr = PetscMemzero(root_data,Nl*sizeof(PetscInt));CHKERRQ(ierr);
3459   ierr = PetscSFReduceBegin(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3460   ierr = PetscSFReduceEnd(sf,MPIU_INT,leaf_data,root_data,MPI_MAX);CHKERRQ(ierr);
3461 
3462   /* count indexes in local part of layout */
3463   nlocals = 0;
3464   first_index = -1;
3465   first_found = PETSC_FALSE;
3466   for (i=0;i<Nl;i++) {
3467     if (!first_found && root_data[i]) {
3468       first_found = PETSC_TRUE;
3469       first_index = i;
3470     }
3471     nlocals += root_data[i];
3472   }
3473 
3474   /* cumulative of number of indexes and size of subset without holes */
3475 #if defined(PETSC_HAVE_MPI_EXSCAN)
3476   start = 0;
3477   ierr = MPI_Exscan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3478 #else
3479   ierr = MPI_Scan(&nlocals,&start,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3480   start = start-nlocals;
3481 #endif
3482 
3483   if (N_n) { /* compute total size of new subset if requested */
3484     *N_n = start + nlocals;
3485     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)subset),&commsize);CHKERRQ(ierr);
3486     ierr = MPI_Bcast(N_n,1,MPIU_INT,commsize-1,PetscObjectComm((PetscObject)subset));CHKERRQ(ierr);
3487   }
3488 
3489   /* adapt root data with cumulative */
3490   if (first_found) {
3491     PetscInt old_index;
3492 
3493     root_data[first_index] += start;
3494     old_index = first_index;
3495     for (i=first_index+1;i<Nl;i++) {
3496       if (root_data[i]) {
3497         root_data[i] += root_data[old_index];
3498         old_index = i;
3499       }
3500     }
3501   }
3502 
3503   /* from roots to leaves */
3504   ierr = PetscSFBcastBegin(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3505   ierr = PetscSFBcastEnd(sf,MPIU_INT,root_data,leaf_data);CHKERRQ(ierr);
3506   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
3507 
3508   /* create new IS with global indexes without holes */
3509   if (subset_mult) {
3510     const PetscInt* idxs_mult;
3511     PetscInt        cum;
3512 
3513     cum = 0;
3514     ierr = ISGetIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3515     for (i=0;i<n;i++) {
3516       PetscInt j;
3517       for (j=0;j<idxs_mult[i];j++) gidxs[cum++] = leaf_data[i] - idxs_mult[i] + j;
3518     }
3519     ierr = ISRestoreIndices(subset_mult,&idxs_mult);CHKERRQ(ierr);
3520   } else {
3521     for (i=0;i<n;i++) {
3522       gidxs[i] = leaf_data[i]-1;
3523     }
3524   }
3525   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)subset),n_n,gidxs,PETSC_OWN_POINTER,subset_n);CHKERRQ(ierr);
3526   ierr = PetscFree2(leaf_data,root_data);CHKERRQ(ierr);
3527   PetscFunctionReturn(0);
3528 }
3529 
3530 #undef __FUNCT__
3531 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3532 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3533 {
3534   PetscInt       i,j;
3535   PetscScalar    *alphas;
3536   PetscErrorCode ierr;
3537 
3538   PetscFunctionBegin;
3539   /* this implements stabilized Gram-Schmidt */
3540   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3541   for (i=0;i<n;i++) {
3542     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3543     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3544     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3545   }
3546   ierr = PetscFree(alphas);CHKERRQ(ierr);
3547   PetscFunctionReturn(0);
3548 }
3549 
3550 #undef __FUNCT__
3551 #define __FUNCT__ "MatISGetSubassemblingPattern"
3552 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscInt redprocs, IS* is_sends)
3553 {
3554   IS             ranks_send_to;
3555   PetscInt       n_neighs,*neighs,*n_shared,**shared;
3556   PetscMPIInt    size,rank,color;
3557   PetscInt       *xadj,*adjncy;
3558   PetscInt       *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
3559   PetscInt       i,local_size,threshold=0;
3560   PetscBool      use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3561   PetscSubcomm   subcomm;
3562   PetscErrorCode ierr;
3563 
3564   PetscFunctionBegin;
3565   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3566   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3567   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3568 
3569   /* Get info on mapping */
3570   ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&local_size);CHKERRQ(ierr);
3571   ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3572 
3573   /* build local CSR graph of subdomains' connectivity */
3574   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3575   xadj[0] = 0;
3576   xadj[1] = PetscMax(n_neighs-1,0);
3577   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3578   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3579 
3580   if (threshold) {
3581     PetscInt xadj_count = 0;
3582     for (i=1;i<n_neighs;i++) {
3583       if (n_shared[i] > threshold) {
3584         adjncy[xadj_count] = neighs[i];
3585         adjncy_wgt[xadj_count] = n_shared[i];
3586         xadj_count++;
3587       }
3588     }
3589     xadj[1] = xadj_count;
3590   } else {
3591     if (xadj[1]) {
3592       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3593       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3594     }
3595   }
3596   ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3597   if (use_square) {
3598     for (i=0;i<xadj[1];i++) {
3599       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3600     }
3601   }
3602   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3603 
3604   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3605 
3606   /*
3607     Restrict work on active processes only.
3608   */
3609   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3610   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3611   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3612   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3613   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3614   if (color) {
3615     ierr = PetscFree(xadj);CHKERRQ(ierr);
3616     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3617     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3618   } else {
3619     Mat             subdomain_adj;
3620     IS              new_ranks,new_ranks_contig;
3621     MatPartitioning partitioner;
3622     PetscInt        prank,rstart=0,rend=0;
3623     PetscInt        *is_indices,*oldranks;
3624     PetscBool       aggregate;
3625 
3626     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3627     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3628     prank = rank;
3629     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3630     /*
3631     for (i=0;i<size;i++) {
3632       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3633     }
3634     */
3635     for (i=0;i<xadj[1];i++) {
3636       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3637     }
3638     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3639     aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
3640     if (aggregate) {
3641       PetscInt    lrows,row,ncols,*cols;
3642       PetscMPIInt nrank;
3643       PetscScalar *vals;
3644 
3645       ierr = MPI_Comm_rank(PetscSubcommChild(subcomm),&nrank);CHKERRQ(ierr);
3646       lrows = 0;
3647       if (nrank<redprocs) {
3648         lrows = size/redprocs;
3649         if (nrank<size%redprocs) lrows++;
3650       }
3651       ierr = MatCreateAIJ(PetscSubcommChild(subcomm),lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr);
3652       ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr);
3653       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3654       ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
3655       row = nrank;
3656       ncols = xadj[1]-xadj[0];
3657       cols = adjncy;
3658       ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr);
3659       for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
3660       ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3661       ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3662       ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3663       ierr = PetscFree(xadj);CHKERRQ(ierr);
3664       ierr = PetscFree(adjncy);CHKERRQ(ierr);
3665       ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3666       ierr = PetscFree(vals);CHKERRQ(ierr);
3667     } else {
3668       ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3669     }
3670     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3671 
3672     /* Partition */
3673     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3674     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3675     if (use_vwgt) {
3676       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3677       v_wgt[0] = local_size;
3678       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3679     }
3680     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3681     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3682     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3683     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3684     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3685 
3686     /* renumber new_ranks to avoid "holes" in new set of processors */
3687     ierr = PCBDDCSubsetNumbering(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr);
3688     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3689     ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3690     if (!redprocs) {
3691       ranks_send_to_idx[0] = oldranks[is_indices[0]];
3692     } else {
3693       PetscInt    idxs[1];
3694       PetscMPIInt tag;
3695       MPI_Request *reqs;
3696 
3697       ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr);
3698       ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr);
3699       for (i=rstart;i<rend;i++) {
3700         ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,PetscSubcommChild(subcomm),&reqs[i-rstart]);CHKERRQ(ierr);
3701       }
3702       ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,PetscSubcommChild(subcomm),MPI_STATUS_IGNORE);CHKERRQ(ierr);
3703       ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3704       ierr = PetscFree(reqs);CHKERRQ(ierr);
3705       ranks_send_to_idx[0] = oldranks[idxs[0]];
3706     }
3707     ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3708     /* clean up */
3709     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3710     ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr);
3711     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3712     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3713   }
3714   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3715 
3716   /* assemble parallel IS for sends */
3717   i = 1;
3718   if (color) i=0;
3719   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3720   /* get back IS */
3721   *is_sends = ranks_send_to;
3722   PetscFunctionReturn(0);
3723 }
3724 
3725 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3726 
3727 #undef __FUNCT__
3728 #define __FUNCT__ "MatISSubassemble"
3729 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3730 {
3731   Mat                    local_mat;
3732   IS                     is_sends_internal;
3733   PetscInt               rows,cols,new_local_rows;
3734   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3735   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3736   ISLocalToGlobalMapping l2gmap;
3737   PetscInt*              l2gmap_indices;
3738   const PetscInt*        is_indices;
3739   MatType                new_local_type;
3740   /* buffers */
3741   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3742   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3743   PetscInt               *recv_buffer_idxs_local;
3744   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3745   /* MPI */
3746   MPI_Comm               comm,comm_n;
3747   PetscSubcomm           subcomm;
3748   PetscMPIInt            n_sends,n_recvs,commsize;
3749   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3750   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3751   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3752   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3753   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3754   PetscErrorCode         ierr;
3755 
3756   PetscFunctionBegin;
3757   /* TODO: add missing checks */
3758   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3759   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3760   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3761   PetscValidLogicalCollectiveInt(mat,nis,7);
3762   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3763   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3764   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3765   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3766   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3767   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3768   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3769   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3770     PetscInt mrows,mcols,mnrows,mncols;
3771     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3772     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3773     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3774     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3775     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3776     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3777   }
3778   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3779   PetscValidLogicalCollectiveInt(mat,bs,0);
3780   /* prepare IS for sending if not provided */
3781   if (!is_sends) {
3782     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3783     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,0,&is_sends_internal);CHKERRQ(ierr);
3784   } else {
3785     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3786     is_sends_internal = is_sends;
3787   }
3788 
3789   /* get comm */
3790   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3791 
3792   /* compute number of sends */
3793   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3794   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3795 
3796   /* compute number of receives */
3797   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3798   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3799   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3800   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3801   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3802   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3803   ierr = PetscFree(iflags);CHKERRQ(ierr);
3804 
3805   /* restrict comm if requested */
3806   subcomm = 0;
3807   destroy_mat = PETSC_FALSE;
3808   if (restrict_comm) {
3809     PetscMPIInt color,subcommsize;
3810 
3811     color = 0;
3812     if (restrict_full) {
3813       if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
3814     } else {
3815       if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
3816     }
3817     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3818     subcommsize = commsize - subcommsize;
3819     /* check if reuse has been requested */
3820     if (reuse == MAT_REUSE_MATRIX) {
3821       if (*mat_n) {
3822         PetscMPIInt subcommsize2;
3823         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3824         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3825         comm_n = PetscObjectComm((PetscObject)*mat_n);
3826       } else {
3827         comm_n = PETSC_COMM_SELF;
3828       }
3829     } else { /* MAT_INITIAL_MATRIX */
3830       PetscMPIInt rank;
3831 
3832       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3833       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3834       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3835       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3836       comm_n = PetscSubcommChild(subcomm);
3837     }
3838     /* flag to destroy *mat_n if not significative */
3839     if (color) destroy_mat = PETSC_TRUE;
3840   } else {
3841     comm_n = comm;
3842   }
3843 
3844   /* prepare send/receive buffers */
3845   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3846   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3847   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3848   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3849   if (nis) {
3850     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3851   }
3852 
3853   /* Get data from local matrices */
3854   if (!isdense) {
3855     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3856     /* TODO: See below some guidelines on how to prepare the local buffers */
3857     /*
3858        send_buffer_vals should contain the raw values of the local matrix
3859        send_buffer_idxs should contain:
3860        - MatType_PRIVATE type
3861        - PetscInt        size_of_l2gmap
3862        - PetscInt        global_row_indices[size_of_l2gmap]
3863        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3864     */
3865   } else {
3866     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3867     ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr);
3868     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3869     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3870     send_buffer_idxs[1] = i;
3871     ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3872     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3873     ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3874     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3875     for (i=0;i<n_sends;i++) {
3876       ilengths_vals[is_indices[i]] = len*len;
3877       ilengths_idxs[is_indices[i]] = len+2;
3878     }
3879   }
3880   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3881   /* additional is (if any) */
3882   if (nis) {
3883     PetscMPIInt psum;
3884     PetscInt j;
3885     for (j=0,psum=0;j<nis;j++) {
3886       PetscInt plen;
3887       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3888       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3889       psum += len+1; /* indices + lenght */
3890     }
3891     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3892     for (j=0,psum=0;j<nis;j++) {
3893       PetscInt plen;
3894       const PetscInt *is_array_idxs;
3895       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3896       send_buffer_idxs_is[psum] = plen;
3897       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3898       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3899       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3900       psum += plen+1; /* indices + lenght */
3901     }
3902     for (i=0;i<n_sends;i++) {
3903       ilengths_idxs_is[is_indices[i]] = psum;
3904     }
3905     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3906   }
3907 
3908   buf_size_idxs = 0;
3909   buf_size_vals = 0;
3910   buf_size_idxs_is = 0;
3911   for (i=0;i<n_recvs;i++) {
3912     buf_size_idxs += (PetscInt)olengths_idxs[i];
3913     buf_size_vals += (PetscInt)olengths_vals[i];
3914     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3915   }
3916   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3917   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3918   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3919 
3920   /* get new tags for clean communications */
3921   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3922   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3923   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3924 
3925   /* allocate for requests */
3926   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3927   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3928   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3929   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3930   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3931   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3932 
3933   /* communications */
3934   ptr_idxs = recv_buffer_idxs;
3935   ptr_vals = recv_buffer_vals;
3936   ptr_idxs_is = recv_buffer_idxs_is;
3937   for (i=0;i<n_recvs;i++) {
3938     source_dest = onodes[i];
3939     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3940     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3941     ptr_idxs += olengths_idxs[i];
3942     ptr_vals += olengths_vals[i];
3943     if (nis) {
3944       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3945       ptr_idxs_is += olengths_idxs_is[i];
3946     }
3947   }
3948   for (i=0;i<n_sends;i++) {
3949     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3950     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3951     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3952     if (nis) {
3953       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3954     }
3955   }
3956   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3957   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3958 
3959   /* assemble new l2g map */
3960   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3961   ptr_idxs = recv_buffer_idxs;
3962   new_local_rows = 0;
3963   for (i=0;i<n_recvs;i++) {
3964     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3965     ptr_idxs += olengths_idxs[i];
3966   }
3967   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3968   ptr_idxs = recv_buffer_idxs;
3969   new_local_rows = 0;
3970   for (i=0;i<n_recvs;i++) {
3971     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3972     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3973     ptr_idxs += olengths_idxs[i];
3974   }
3975   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3976   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3977   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3978 
3979   /* infer new local matrix type from received local matrices type */
3980   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3981   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3982   if (n_recvs) {
3983     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3984     ptr_idxs = recv_buffer_idxs;
3985     for (i=0;i<n_recvs;i++) {
3986       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3987         new_local_type_private = MATAIJ_PRIVATE;
3988         break;
3989       }
3990       ptr_idxs += olengths_idxs[i];
3991     }
3992     switch (new_local_type_private) {
3993       case MATDENSE_PRIVATE:
3994         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3995           new_local_type = MATSEQAIJ;
3996           bs = 1;
3997         } else { /* if I receive only 1 dense matrix */
3998           new_local_type = MATSEQDENSE;
3999           bs = 1;
4000         }
4001         break;
4002       case MATAIJ_PRIVATE:
4003         new_local_type = MATSEQAIJ;
4004         bs = 1;
4005         break;
4006       case MATBAIJ_PRIVATE:
4007         new_local_type = MATSEQBAIJ;
4008         break;
4009       case MATSBAIJ_PRIVATE:
4010         new_local_type = MATSEQSBAIJ;
4011         break;
4012       default:
4013         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
4014         break;
4015     }
4016   } else { /* by default, new_local_type is seqdense */
4017     new_local_type = MATSEQDENSE;
4018     bs = 1;
4019   }
4020 
4021   /* create MATIS object if needed */
4022   if (reuse == MAT_INITIAL_MATRIX) {
4023     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
4024     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr);
4025   } else {
4026     /* it also destroys the local matrices */
4027     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
4028   }
4029   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
4030   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
4031 
4032   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4033 
4034   /* Global to local map of received indices */
4035   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
4036   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
4037   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
4038 
4039   /* restore attributes -> type of incoming data and its size */
4040   buf_size_idxs = 0;
4041   for (i=0;i<n_recvs;i++) {
4042     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
4043     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
4044     buf_size_idxs += (PetscInt)olengths_idxs[i];
4045   }
4046   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
4047 
4048   /* set preallocation */
4049   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
4050   if (!newisdense) {
4051     PetscInt *new_local_nnz=0;
4052 
4053     ptr_vals = recv_buffer_vals;
4054     ptr_idxs = recv_buffer_idxs_local;
4055     if (n_recvs) {
4056       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
4057     }
4058     for (i=0;i<n_recvs;i++) {
4059       PetscInt j;
4060       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
4061         for (j=0;j<*(ptr_idxs+1);j++) {
4062           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
4063         }
4064       } else {
4065         /* TODO */
4066       }
4067       ptr_idxs += olengths_idxs[i];
4068     }
4069     if (new_local_nnz) {
4070       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
4071       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
4072       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
4073       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4074       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
4075       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
4076     } else {
4077       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4078     }
4079     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
4080   } else {
4081     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
4082   }
4083 
4084   /* set values */
4085   ptr_vals = recv_buffer_vals;
4086   ptr_idxs = recv_buffer_idxs_local;
4087   for (i=0;i<n_recvs;i++) {
4088     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
4089       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4090       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
4091       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4092       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
4093       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4094     } else {
4095       /* TODO */
4096     }
4097     ptr_idxs += olengths_idxs[i];
4098     ptr_vals += olengths_vals[i];
4099   }
4100   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4101   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4102   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4103   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4104   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
4105   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
4106 
4107 #if 0
4108   if (!restrict_comm) { /* check */
4109     Vec       lvec,rvec;
4110     PetscReal infty_error;
4111 
4112     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
4113     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
4114     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
4115     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
4116     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
4117     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4118     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
4119     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
4120     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
4121   }
4122 #endif
4123 
4124   /* assemble new additional is (if any) */
4125   if (nis) {
4126     PetscInt **temp_idxs,*count_is,j,psum;
4127 
4128     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4129     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
4130     ptr_idxs = recv_buffer_idxs_is;
4131     psum = 0;
4132     for (i=0;i<n_recvs;i++) {
4133       for (j=0;j<nis;j++) {
4134         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4135         count_is[j] += plen; /* increment counting of buffer for j-th IS */
4136         psum += plen;
4137         ptr_idxs += plen+1; /* shift pointer to received data */
4138       }
4139     }
4140     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
4141     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
4142     for (i=1;i<nis;i++) {
4143       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
4144     }
4145     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
4146     ptr_idxs = recv_buffer_idxs_is;
4147     for (i=0;i<n_recvs;i++) {
4148       for (j=0;j<nis;j++) {
4149         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
4150         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
4151         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
4152         ptr_idxs += plen+1; /* shift pointer to received data */
4153       }
4154     }
4155     for (i=0;i<nis;i++) {
4156       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4157       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
4158       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4159     }
4160     ierr = PetscFree(count_is);CHKERRQ(ierr);
4161     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
4162     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
4163   }
4164   /* free workspace */
4165   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
4166   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4167   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
4168   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4169   if (isdense) {
4170     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
4171     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
4172   } else {
4173     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
4174   }
4175   if (nis) {
4176     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
4177     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
4178   }
4179   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
4180   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
4181   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
4182   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
4183   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
4184   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
4185   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
4186   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
4187   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
4188   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
4189   ierr = PetscFree(onodes);CHKERRQ(ierr);
4190   if (nis) {
4191     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
4192     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
4193     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
4194   }
4195   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
4196   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
4197     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
4198     for (i=0;i<nis;i++) {
4199       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4200     }
4201     *mat_n = NULL;
4202   }
4203   PetscFunctionReturn(0);
4204 }
4205 
4206 /* temporary hack into ksp private data structure */
4207 #include <petsc/private/kspimpl.h>
4208 
4209 #undef __FUNCT__
4210 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
4211 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
4212 {
4213   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
4214   PC_IS                  *pcis = (PC_IS*)pc->data;
4215   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
4216   MatNullSpace           CoarseNullSpace=NULL;
4217   ISLocalToGlobalMapping coarse_islg;
4218   IS                     coarse_is,*isarray;
4219   PetscInt               i,im_active=-1,active_procs=-1;
4220   PetscInt               nis,nisdofs,nisneu;
4221   PC                     pc_temp;
4222   PCType                 coarse_pc_type;
4223   KSPType                coarse_ksp_type;
4224   PetscBool              multilevel_requested,multilevel_allowed;
4225   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
4226   Mat                    t_coarse_mat_is;
4227   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
4228   PetscMPIInt            all_procs;
4229   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
4230   PetscBool              compute_vecs = PETSC_FALSE;
4231   PetscScalar            *array;
4232   PetscErrorCode         ierr;
4233 
4234   PetscFunctionBegin;
4235   /* Assign global numbering to coarse dofs */
4236   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
4237     PetscInt ocoarse_size;
4238     compute_vecs = PETSC_TRUE;
4239     ocoarse_size = pcbddc->coarse_size;
4240     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
4241     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
4242     /* see if we can avoid some work */
4243     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
4244       /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
4245       if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
4246         PC        pc;
4247         PetscBool isbddc;
4248 
4249         /* temporary workaround since PCBDDC does not have a reset method so far */
4250         ierr = KSPGetPC(pcbddc->coarse_ksp,&pc);CHKERRQ(ierr);
4251         ierr = PetscObjectTypeCompare((PetscObject)pc,PCBDDC,&isbddc);CHKERRQ(ierr);
4252         if (isbddc) {
4253           ierr = PCDestroy(&pc);CHKERRQ(ierr);
4254         }
4255         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
4256         coarse_reuse = PETSC_FALSE;
4257       } else { /* we can safely reuse already computed coarse matrix */
4258         coarse_reuse = PETSC_TRUE;
4259       }
4260     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
4261       coarse_reuse = PETSC_FALSE;
4262     }
4263     /* reset any subassembling information */
4264     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4265     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4266   } else { /* primal space is unchanged, so we can reuse coarse matrix */
4267     coarse_reuse = PETSC_TRUE;
4268   }
4269 
4270   /* count "active" (i.e. with positive local size) and "void" processes */
4271   im_active = !!(pcis->n);
4272   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4273   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
4274   void_procs = all_procs-active_procs;
4275   csin_type_simple = PETSC_TRUE;
4276   redist = PETSC_FALSE;
4277   if (pcbddc->current_level && void_procs) {
4278     csin_ml = PETSC_TRUE;
4279     ncoarse_ml = void_procs;
4280     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
4281     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
4282       csin_ds = PETSC_TRUE;
4283       ncoarse_ds = pcbddc->redistribute_coarse;
4284       redist = PETSC_TRUE;
4285     } else {
4286       csin_ds = PETSC_TRUE;
4287       ncoarse_ds = active_procs;
4288       redist = PETSC_TRUE;
4289     }
4290   } else {
4291     csin_ml = PETSC_FALSE;
4292     ncoarse_ml = all_procs;
4293     if (void_procs) {
4294       csin_ds = PETSC_TRUE;
4295       ncoarse_ds = void_procs;
4296       csin_type_simple = PETSC_FALSE;
4297     } else {
4298       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
4299         csin_ds = PETSC_TRUE;
4300         ncoarse_ds = pcbddc->redistribute_coarse;
4301         redist = PETSC_TRUE;
4302       } else {
4303         csin_ds = PETSC_FALSE;
4304         ncoarse_ds = all_procs;
4305       }
4306     }
4307   }
4308 
4309   /*
4310     test if we can go multilevel: three conditions must be satisfied:
4311     - we have not exceeded the number of levels requested
4312     - we can actually subassemble the active processes
4313     - we can find a suitable number of MPI processes where we can place the subassembled problem
4314   */
4315   multilevel_allowed = PETSC_FALSE;
4316   multilevel_requested = PETSC_FALSE;
4317   if (pcbddc->current_level < pcbddc->max_levels) {
4318     multilevel_requested = PETSC_TRUE;
4319     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4320       multilevel_allowed = PETSC_FALSE;
4321     } else {
4322       multilevel_allowed = PETSC_TRUE;
4323     }
4324   }
4325   /* determine number of process partecipating to coarse solver */
4326   if (multilevel_allowed) {
4327     ncoarse = ncoarse_ml;
4328     csin = csin_ml;
4329     redist = PETSC_FALSE;
4330   } else {
4331     ncoarse = ncoarse_ds;
4332     csin = csin_ds;
4333   }
4334 
4335   /* creates temporary l2gmap and IS for coarse indexes */
4336   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4337   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4338 
4339   /* creates temporary MATIS object for coarse matrix */
4340   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4341   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4342   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4343   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4344 #if 0
4345   {
4346     PetscViewer viewer;
4347     char filename[256];
4348     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4349     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4350     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4351     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4352     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4353   }
4354 #endif
4355   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr);
4356   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4357   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4358   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4359   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4360 
4361   /* compute dofs splitting and neumann boundaries for coarse dofs */
4362   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4363     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4364     const PetscInt         *idxs;
4365     ISLocalToGlobalMapping tmap;
4366 
4367     /* create map between primal indices (in local representative ordering) and local primal numbering */
4368     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4369     /* allocate space for temporary storage */
4370     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4371     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4372     /* allocate for IS array */
4373     nisdofs = pcbddc->n_ISForDofsLocal;
4374     nisneu = !!pcbddc->NeumannBoundariesLocal;
4375     nis = nisdofs + nisneu;
4376     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4377     /* dofs splitting */
4378     for (i=0;i<nisdofs;i++) {
4379       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4380       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4381       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4382       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4383       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4384       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4385       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4386       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4387     }
4388     /* neumann boundaries */
4389     if (pcbddc->NeumannBoundariesLocal) {
4390       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4391       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4392       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4393       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4394       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4395       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4396       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4397       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4398     }
4399     /* free memory */
4400     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4401     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4402     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4403   } else {
4404     nis = 0;
4405     nisdofs = 0;
4406     nisneu = 0;
4407     isarray = NULL;
4408   }
4409   /* destroy no longer needed map */
4410   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4411 
4412   /* restrict on coarse candidates (if needed) */
4413   coarse_mat_is = NULL;
4414   if (csin) {
4415     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4416       if (redist) {
4417         PetscMPIInt rank;
4418         PetscInt    spc,n_spc_p1,dest[1],destsize;
4419 
4420         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4421         spc = active_procs/ncoarse;
4422         n_spc_p1 = active_procs%ncoarse;
4423         if (im_active) {
4424           destsize = 1;
4425           if (rank > n_spc_p1*(spc+1)-1) {
4426             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4427           } else {
4428             dest[0] = rank/(spc+1);
4429           }
4430         } else {
4431           destsize = 0;
4432         }
4433         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4434       } else if (csin_type_simple) {
4435         PetscMPIInt rank;
4436         PetscInt    issize,isidx;
4437 
4438         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4439         if (im_active) {
4440           issize = 1;
4441           isidx = (PetscInt)rank;
4442         } else {
4443           issize = 0;
4444           isidx = -1;
4445         }
4446         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4447       } else { /* get a suitable subassembling pattern from MATIS code */
4448         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4449       }
4450 
4451       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4452       if (!redist || ncoarse <= void_procs) {
4453         PetscInt ncoarse_cand,tissize,*nisindices;
4454         PetscInt *coarse_candidates;
4455         const PetscInt* tisindices;
4456 
4457         /* get coarse candidates' ranks in pc communicator */
4458         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4459         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4460         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4461           if (!coarse_candidates[i]) {
4462             coarse_candidates[ncoarse_cand++]=i;
4463           }
4464         }
4465         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4466 
4467 
4468         if (pcbddc->dbg_flag) {
4469           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4470           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4471           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4472           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4473           for (i=0;i<ncoarse_cand;i++) {
4474             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4475           }
4476           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4477           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4478         }
4479         /* shift the pattern on coarse candidates */
4480         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4481         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4482         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4483         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4484         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4485         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4486         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4487       }
4488       if (pcbddc->dbg_flag) {
4489         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4490         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4491         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4492         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4493       }
4494     }
4495     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4496     if (multilevel_allowed) { /* we need to keep tracking of void processes for future placements */
4497       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_FALSE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4498     } else { /* this is the last level, so use just receiving processes in subcomm */
4499       ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4500     }
4501   } else {
4502     if (pcbddc->dbg_flag) {
4503       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4504       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4505       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4506     }
4507     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4508     coarse_mat_is = t_coarse_mat_is;
4509   }
4510 
4511   /* create local to global scatters for coarse problem */
4512   if (compute_vecs) {
4513     PetscInt lrows;
4514     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4515     if (coarse_mat_is) {
4516       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4517     } else {
4518       lrows = 0;
4519     }
4520     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4521     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4522     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4523     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4524     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4525   }
4526   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4527   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4528 
4529   /* set defaults for coarse KSP and PC */
4530   if (multilevel_allowed) {
4531     coarse_ksp_type = KSPRICHARDSON;
4532     coarse_pc_type = PCBDDC;
4533   } else {
4534     coarse_ksp_type = KSPPREONLY;
4535     coarse_pc_type = PCREDUNDANT;
4536   }
4537 
4538   /* print some info if requested */
4539   if (pcbddc->dbg_flag) {
4540     if (!multilevel_allowed) {
4541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4542       if (multilevel_requested) {
4543         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4544       } else if (pcbddc->max_levels) {
4545         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4546       }
4547       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4548     }
4549   }
4550 
4551   /* create the coarse KSP object only once with defaults */
4552   if (coarse_mat_is) {
4553     MatReuse coarse_mat_reuse;
4554     PetscViewer dbg_viewer = NULL;
4555     if (pcbddc->dbg_flag) {
4556       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4557       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4558     }
4559     if (!pcbddc->coarse_ksp) {
4560       char prefix[256],str_level[16];
4561       size_t len;
4562       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4563       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4564       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4565       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4566       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4567       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4568       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4569       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4570       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4571       /* prefix */
4572       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4573       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4574       if (!pcbddc->current_level) {
4575         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4576         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4577       } else {
4578         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4579         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4580         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4581         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4582         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4583         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4584       }
4585       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4586       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4587       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4588       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4589       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4590       /* allow user customization */
4591       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4592     }
4593     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4594     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4595     if (nisdofs) {
4596       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4597       for (i=0;i<nisdofs;i++) {
4598         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4599       }
4600     }
4601     if (nisneu) {
4602       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4603       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4604     }
4605 
4606     /* get some info after set from options */
4607     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4608     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4609     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4610     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4611       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4612       isbddc = PETSC_FALSE;
4613     }
4614     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4615     if (isredundant) {
4616       KSP inner_ksp;
4617       PC  inner_pc;
4618       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4619       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4620       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4621     }
4622 
4623     /* assemble coarse matrix */
4624     if (coarse_reuse) {
4625       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4626       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4627       coarse_mat_reuse = MAT_REUSE_MATRIX;
4628     } else {
4629       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4630     }
4631     if (isbddc || isnn) {
4632       if (pcbddc->coarsening_ratio > 1) {
4633         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4634           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4635           if (pcbddc->dbg_flag) {
4636             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4637             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4638             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4639             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4640           }
4641         }
4642         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4643       } else {
4644         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4645         coarse_mat = coarse_mat_is;
4646       }
4647     } else {
4648       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4649     }
4650     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4651 
4652     /* propagate symmetry info of coarse matrix */
4653     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4654     if (pc->pmat->symmetric_set) {
4655       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr);
4656     }
4657     if (pc->pmat->hermitian_set) {
4658       ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr);
4659     }
4660     if (pc->pmat->spd_set) {
4661       ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr);
4662     }
4663     /* set operators */
4664     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4665     if (pcbddc->dbg_flag) {
4666       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4667     }
4668   } else { /* processes non partecipating to coarse solver (if any) */
4669     coarse_mat = 0;
4670   }
4671   ierr = PetscFree(isarray);CHKERRQ(ierr);
4672 #if 0
4673   {
4674     PetscViewer viewer;
4675     char filename[256];
4676     sprintf(filename,"coarse_mat.m");
4677     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4678     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4679     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4680     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4681   }
4682 #endif
4683 
4684   /* Compute coarse null space (special handling by BDDC only) */
4685 #if 0
4686   if (pcbddc->NullSpace) {
4687     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4688   }
4689 #endif
4690 
4691   if (pcbddc->coarse_ksp) {
4692     Vec crhs,csol;
4693     PetscBool ispreonly;
4694 
4695     if (CoarseNullSpace) {
4696       if (isbddc) {
4697         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4698       } else {
4699         ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr);
4700       }
4701     }
4702     /* setup coarse ksp */
4703     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4704     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4705     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4706     /* hack */
4707     if (!csol) {
4708       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4709     }
4710     if (!crhs) {
4711       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4712     }
4713     /* Check coarse problem if in debug mode or if solving with an iterative method */
4714     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4715     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4716       KSP       check_ksp;
4717       KSPType   check_ksp_type;
4718       PC        check_pc;
4719       Vec       check_vec,coarse_vec;
4720       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4721       PetscInt  its;
4722       PetscBool compute_eigs;
4723       PetscReal *eigs_r,*eigs_c;
4724       PetscInt  neigs;
4725       const char *prefix;
4726 
4727       /* Create ksp object suitable for estimation of extreme eigenvalues */
4728       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4729       ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr);
4730       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4731       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4732       if (ispreonly) {
4733         check_ksp_type = KSPPREONLY;
4734         compute_eigs = PETSC_FALSE;
4735       } else {
4736         check_ksp_type = KSPGMRES;
4737         compute_eigs = PETSC_TRUE;
4738       }
4739       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4740       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4741       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4742       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4743       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4744       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4745       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4746       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4747       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4748       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4749       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4750       /* create random vec */
4751       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4752       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4753       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4754       if (CoarseNullSpace) {
4755         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4756       }
4757       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4758       /* solve coarse problem */
4759       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4760       if (CoarseNullSpace) {
4761         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4762       }
4763       /* set eigenvalue estimation if preonly has not been requested */
4764       if (compute_eigs) {
4765         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4766         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4767         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4768         lambda_max = eigs_r[neigs-1];
4769         lambda_min = eigs_r[0];
4770         if (pcbddc->use_coarse_estimates) {
4771           if (lambda_max>lambda_min) {
4772             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4773             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4774           }
4775         }
4776       }
4777 
4778       /* check coarse problem residual error */
4779       if (pcbddc->dbg_flag) {
4780         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4781         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4782         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4783         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4784         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4785         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4786         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4787         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4788         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4789         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4790         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4791         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4792         if (compute_eigs) {
4793           PetscReal lambda_max_s,lambda_min_s;
4794           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4795           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4796           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4797           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4798           for (i=0;i<neigs;i++) {
4799             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4800           }
4801         }
4802         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4803         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4804       }
4805       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4806       if (compute_eigs) {
4807         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4808         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4809       }
4810     }
4811   }
4812   /* print additional info */
4813   if (pcbddc->dbg_flag) {
4814     /* waits until all processes reaches this point */
4815     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4816     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4817     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4818   }
4819 
4820   /* free memory */
4821   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4822   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4823   PetscFunctionReturn(0);
4824 }
4825 
4826 #undef __FUNCT__
4827 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4828 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4829 {
4830   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4831   PC_IS*         pcis = (PC_IS*)pc->data;
4832   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4833   IS             subset,subset_mult,subset_n;
4834   PetscInt       local_size,coarse_size=0;
4835   PetscInt       *local_primal_indices=NULL;
4836   const PetscInt *t_local_primal_indices;
4837   PetscErrorCode ierr;
4838 
4839   PetscFunctionBegin;
4840   /* Compute global number of coarse dofs */
4841   if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) {
4842     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
4843   }
4844   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr);
4845   ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr);
4846   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4847   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr);
4848   ierr = PCBDDCSubsetNumbering(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr);
4849   ierr = ISDestroy(&subset);CHKERRQ(ierr);
4850   ierr = ISDestroy(&subset_mult);CHKERRQ(ierr);
4851   ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr);
4852   if (local_size != pcbddc->local_primal_size) {
4853     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %d != %d",local_size,pcbddc->local_primal_size);
4854   }
4855   ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr);
4856   ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4857   ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr);
4858   ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr);
4859   ierr = ISDestroy(&subset_n);CHKERRQ(ierr);
4860 
4861   /* check numbering */
4862   if (pcbddc->dbg_flag) {
4863     PetscScalar coarsesum,*array,*array2;
4864     PetscInt    i;
4865     PetscBool   set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4866 
4867     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4868     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4869     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4870     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4871     /* counter */
4872     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4873     ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
4874     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4875     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4876     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4877     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4878 
4879     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4880     for (i=0;i<pcbddc->local_primal_size;i++) {
4881       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4882     }
4883     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4884     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4885     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4886     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4887     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4888     ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4889     ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4890     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4891     ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4892     for (i=0;i<pcis->n;i++) {
4893       if (array[i] != 0.0 && array[i] != array2[i]) {
4894         PetscInt owned = (PetscInt)(array[i]);
4895         PetscInt neigh = (PetscInt)(array2[i]);
4896         set_error = PETSC_TRUE;
4897         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a %d processes instead of %d!\n",PetscGlobalRank,i,owned,neigh);CHKERRQ(ierr);
4898       }
4899     }
4900     ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr);
4901     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4902     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4903     for (i=0;i<pcis->n;i++) {
4904       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4905     }
4906     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4907     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4908     ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4909     ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4910     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4911     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4912     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4913       PetscInt *gidxs;
4914 
4915       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4916       ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4917       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4918       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4919       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4920       for (i=0;i<pcbddc->local_primal_size;i++) {
4921         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4922       }
4923       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4924       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4925     }
4926     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4927     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4928   }
4929   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4930   /* get back data */
4931   *coarse_size_n = coarse_size;
4932   *local_primal_indices_n = local_primal_indices;
4933   PetscFunctionReturn(0);
4934 }
4935 
4936 #undef __FUNCT__
4937 #define __FUNCT__ "PCBDDCGlobalToLocal"
4938 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4939 {
4940   IS             localis_t;
4941   PetscInt       i,lsize,*idxs,n;
4942   PetscScalar    *vals;
4943   PetscErrorCode ierr;
4944 
4945   PetscFunctionBegin;
4946   /* get indices in local ordering exploiting local to global map */
4947   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4948   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4949   for (i=0;i<lsize;i++) vals[i] = 1.0;
4950   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4951   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4952   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4953   if (idxs) { /* multilevel guard */
4954     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4955   }
4956   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4957   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4958   ierr = PetscFree(vals);CHKERRQ(ierr);
4959   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4960   /* now compute set in local ordering */
4961   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4962   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4963   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4964   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4965   for (i=0,lsize=0;i<n;i++) {
4966     if (PetscRealPart(vals[i]) > 0.5) {
4967       lsize++;
4968     }
4969   }
4970   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4971   for (i=0,lsize=0;i<n;i++) {
4972     if (PetscRealPart(vals[i]) > 0.5) {
4973       idxs[lsize++] = i;
4974     }
4975   }
4976   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4977   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4978   *localis = localis_t;
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4983 #undef __FUNCT__
4984 #define __FUNCT__ "PCBDDCMatMult_Private"
4985 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4986 {
4987   PCBDDCChange_ctx change_ctx;
4988   PetscErrorCode   ierr;
4989 
4990   PetscFunctionBegin;
4991   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4992   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4993   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4994   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4995   PetscFunctionReturn(0);
4996 }
4997 
4998 #undef __FUNCT__
4999 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
5000 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
5001 {
5002   PCBDDCChange_ctx change_ctx;
5003   PetscErrorCode   ierr;
5004 
5005   PetscFunctionBegin;
5006   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
5007   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
5008   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
5009   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
5010   PetscFunctionReturn(0);
5011 }
5012 
5013 #undef __FUNCT__
5014 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
5015 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
5016 {
5017   PC_IS               *pcis=(PC_IS*)pc->data;
5018   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5019   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5020   Mat                 S_j;
5021   PetscInt            *used_xadj,*used_adjncy;
5022   PetscBool           free_used_adj;
5023   PetscErrorCode      ierr;
5024 
5025   PetscFunctionBegin;
5026   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
5027   free_used_adj = PETSC_FALSE;
5028   if (pcbddc->sub_schurs_layers == -1) {
5029     used_xadj = NULL;
5030     used_adjncy = NULL;
5031   } else {
5032     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
5033       used_xadj = pcbddc->mat_graph->xadj;
5034       used_adjncy = pcbddc->mat_graph->adjncy;
5035     } else if (pcbddc->computed_rowadj) {
5036       used_xadj = pcbddc->mat_graph->xadj;
5037       used_adjncy = pcbddc->mat_graph->adjncy;
5038     } else {
5039       PetscBool      flg_row=PETSC_FALSE;
5040       const PetscInt *xadj,*adjncy;
5041       PetscInt       nvtxs;
5042 
5043       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5044       if (flg_row) {
5045         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
5046         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
5047         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
5048         free_used_adj = PETSC_TRUE;
5049       } else {
5050         pcbddc->sub_schurs_layers = -1;
5051         used_xadj = NULL;
5052         used_adjncy = NULL;
5053       }
5054       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
5055     }
5056   }
5057 
5058   /* setup sub_schurs data */
5059   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
5060   if (!sub_schurs->use_mumps) {
5061     /* pcbddc->ksp_D up to date only if not using MUMPS */
5062     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
5063     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE);CHKERRQ(ierr);
5064   } else {
5065     PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
5066     PetscBool isseqaij;
5067     if (!pcbddc->use_vertices && reuse_solvers) {
5068       PetscInt n_vertices;
5069 
5070       ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr);
5071       reuse_solvers = (PetscBool)!n_vertices;
5072     }
5073     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
5074     if (!isseqaij) {
5075       Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5076       if (matis->A == pcbddc->local_mat) {
5077         ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
5078         ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5079       } else {
5080         ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_REUSE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr);
5081       }
5082     }
5083     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point);CHKERRQ(ierr);
5084   }
5085   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
5086 
5087   /* free adjacency */
5088   if (free_used_adj) {
5089     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
5090   }
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 #undef __FUNCT__
5095 #define __FUNCT__ "PCBDDCInitSubSchurs"
5096 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
5097 {
5098   PC_IS               *pcis=(PC_IS*)pc->data;
5099   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
5100   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
5101   PCBDDCGraph         graph;
5102   PetscErrorCode      ierr;
5103 
5104   PetscFunctionBegin;
5105   /* attach interface graph for determining subsets */
5106   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
5107     IS       verticesIS,verticescomm;
5108     PetscInt vsize,*idxs;
5109 
5110     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
5111     ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr);
5112     ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5113     ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr);
5114     ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr);
5115     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
5116     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
5117     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
5118     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr);
5119     ierr = ISDestroy(&verticescomm);CHKERRQ(ierr);
5120     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
5121 /*
5122     if (pcbddc->dbg_flag) {
5123       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
5124     }
5125 */
5126   } else {
5127     graph = pcbddc->mat_graph;
5128   }
5129 
5130   /* sub_schurs init */
5131   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
5132 
5133   /* free graph struct */
5134   if (pcbddc->sub_schurs_rebuild) {
5135     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
5136   }
5137   PetscFunctionReturn(0);
5138 }
5139