xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision bbb9e6c6d664ca28960acb626686b45e2eac2b03)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       n = PetscMax(n,nmin);
66       cum += subset_size*n;
67       cum2 += n;
68       maxneigs = PetscMax(maxneigs,n);
69     }
70   }
71 
72   if (mss) {
73     if (symmetric) {
74       PetscBLASInt B_itype = 1;
75       PetscBLASInt B_N = mss;
76       PetscScalar  zero = 0.0;
77       PetscScalar  eps = 0.0; /* dlamch? */
78 
79       B_lwork = -1;
80       S = NULL;
81       St = NULL;
82       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
83 #if defined(PETSC_USE_COMPLEX)
84       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
85 #else
86       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
87 #endif
88       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
89       ierr = PetscFPTrapPop();CHKERRQ(ierr);
90     } else {
91         /* TODO */
92     }
93   } else {
94     lwork = 0;
95   }
96 
97   nv = 0;
98   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
99     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
100   }
101   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
102   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
103                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
104 #if defined(PETSC_USE_COMPLEX)
105   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
106 #endif
107   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
108   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
109                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
110                       nv+cum,&pcbddc->adaptive_constraints_idxs,
111                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
112   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
113 
114   maxneigs = 0;
115   cum = cum2 = cumarray = 0;
116   if (sub_schurs->is_Ej_com) {
117     const PetscInt *idxs;
118 
119     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
120     for (cum=0;cum<nv;cum++) {
121       pcbddc->adaptive_constraints_n[cum] = 1;
122       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
123       pcbddc->adaptive_constraints_ptrs[cum] = cum;
124       pcbddc->adaptive_constraints_data[cum] = 1.0;
125     }
126     cum2 = cum;
127     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
128   }
129 
130   if (mss) { /* multilevel */
131     if (pcbddc->use_deluxe_scaling) {
132       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
133     } else {
134       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
135     }
136     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
137   }
138 
139   cumsubs = 0;
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,zero=0.0;
147       PetscBLASInt   B_N;
148 
149       /* S should be copied since we need it for deluxe scaling */
150       if (symmetric) {
151         PetscInt j;
152         for (j=0;j<subset_size;j++) {
153           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
154         }
155         for (j=0;j<subset_size;j++) {
156           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
157         }
158       } else {
159         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
160         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161       }
162       /* is always this the right matrix? */
163       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164 
165       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
166       /* St = Starray+cumarray; */
167 
168       if (symmetric) {
169         PetscBLASInt B_itype = 1;
170         PetscBLASInt B_IL = 1, B_IU;
171         PetscScalar  eps = -1.0; /* dlamch? */
172 
173         /* ask for eigenvalues lower than thresh */
174         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
175         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
176         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
177 #if defined(PETSC_USE_COMPLEX)
178         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
179 #else
180         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
181 #endif
182         ierr = PetscFPTrapPop();CHKERRQ(ierr);
183         if (B_ierr) {
184           if (B_ierr < 0 ) {
185             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
186           } else if (B_ierr <= B_N) {
187             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
188           } else {
189             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
190           }
191         }
192 
193         if (B_neigs > nmax) {
194           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax);
195           B_neigs = nmax;
196         }
197 
198         if (B_neigs < nmin) {
199           PetscBLASInt B_neigs2;
200 
201           B_IL = B_neigs + 1;
202           ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr);
203           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
204           if (symmetric) {
205             PetscInt j;
206             for (j=0;j<subset_size;j++) {
207               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
208             }
209             for (j=0;j<subset_size;j++) {
210               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
211             }
212           } else {
213             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
214             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
215           }
216           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
217 #if defined(PETSC_USE_COMPLEX)
218           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
219 #else
220           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
221 #endif
222           ierr = PetscFPTrapPop();CHKERRQ(ierr);
223           B_neigs += B_neigs2;
224         }
225         if (B_ierr) {
226           if (B_ierr < 0 ) {
227             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
228           } else if (B_ierr <= B_N) {
229             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
230           } else {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
232           }
233         }
234         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
235         for (j=0;j<B_neigs;j++) {
236           if (eigs[j] == 0.0) {
237             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
238           } else {
239             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
240           }
241         }
242       } else {
243           /* TODO */
244       }
245       maxneigs = PetscMax(B_neigs,maxneigs);
246       pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs;
247       cumsubs++;
248 
249       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
250       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
251       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
253       for (j=0;j<B_neigs;j++) {
254 #if 0
255         {
256           PetscBLASInt Blas_N,Blas_one = 1.0;
257           PetscScalar norm;
258           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
259           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
260           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
261             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
262           } else {
263             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
264           }
265           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
266         }
267 #endif
268         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
269         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
270         cum2 += subset_size;
271       }
272       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
273     }
274     /* shift for next computation */
275     cumarray += subset_size*subset_size;
276   }
277   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
278   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
279 
280   if (mss) {
281     if (pcbddc->use_deluxe_scaling) {
282       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
283     } else {
284       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
285     }
286     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
287   }
288   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
289 #if defined(PETSC_USE_COMPLEX)
290   ierr = PetscFree(rwork);CHKERRQ(ierr);
291 #endif
292   if (pcbddc->dbg_flag) {
293     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
294     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNCT__
300 #define __FUNCT__ "PCBDDCSetUpSolvers"
301 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
302 {
303   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
304   PetscScalar    *coarse_submat_vals;
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   /* Setup local scatters R_to_B and (optionally) R_to_D */
309   /* PCBDDCSetUpLocalWorkVectors should be called first! */
310   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
311 
312   /* Setup local neumann solver ksp_R */
313   /* PCBDDCSetUpLocalScatters should be called first! */
314   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
315 
316   /* Change global null space passed in by the user if change of basis has been requested */
317   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
318     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
319   }
320 
321   /*
322      Setup local correction and local part of coarse basis.
323      Gives back the dense local part of the coarse matrix in column major ordering
324   */
325   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
326 
327   /* Compute total number of coarse nodes and setup coarse solver */
328   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
329 
330   /* free */
331   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "PCBDDCResetCustomization"
337 PetscErrorCode PCBDDCResetCustomization(PC pc)
338 {
339   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
344   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
345   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
346   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
347   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
348   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
349   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
350   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
351   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
352   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
353   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "PCBDDCResetTopography"
359 PetscErrorCode PCBDDCResetTopography(PC pc)
360 {
361   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
362   PetscErrorCode ierr;
363 
364   PetscFunctionBegin;
365   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
366   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
367   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
368   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
369   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
370   PetscFunctionReturn(0);
371 }
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "PCBDDCResetSolvers"
375 PetscErrorCode PCBDDCResetSolvers(PC pc)
376 {
377   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
378   PetscScalar    *array;
379   PetscErrorCode ierr;
380 
381   PetscFunctionBegin;
382   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
383   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
384   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
385   ierr = PetscFree(array);CHKERRQ(ierr);
386   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
387   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
388   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
389   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
390   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
391   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
392   if (pcbddc->local_auxmat2) {
393     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
394     ierr = PetscFree(array);CHKERRQ(ierr);
395   }
396   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
397   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
398   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
399   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
400   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
401   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
402   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
403   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
404   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
405   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
406   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
408   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
409   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
410   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
411   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
417 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
418 {
419   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
420   PC_IS          *pcis = (PC_IS*)pc->data;
421   VecType        impVecType;
422   PetscInt       n_constraints,n_R,old_size;
423   PetscErrorCode ierr;
424 
425   PetscFunctionBegin;
426   if (!pcbddc->ConstraintMatrix) {
427     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
428   }
429   /* get sizes */
430   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
431   n_R = pcis->n-pcbddc->n_actual_vertices;
432   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
433   /* local work vectors (try to avoid unneeded work)*/
434   /* R nodes */
435   old_size = -1;
436   if (pcbddc->vec1_R) {
437     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
438   }
439   if (n_R != old_size) {
440     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
441     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
442     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
443     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
444     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
445     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
446   }
447   /* local primal dofs */
448   old_size = -1;
449   if (pcbddc->vec1_P) {
450     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
451   }
452   if (pcbddc->local_primal_size != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
454     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
455     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
456     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
457   }
458   /* local explicit constraints */
459   old_size = -1;
460   if (pcbddc->vec1_C) {
461     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
462   }
463   if (n_constraints && n_constraints != old_size) {
464     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
465     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
466     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
467     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
468   }
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "PCBDDCSetUpCorrection"
474 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
475 {
476   PetscErrorCode         ierr;
477   /* pointers to pcis and pcbddc */
478   PC_IS*                 pcis = (PC_IS*)pc->data;
479   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
480   /* submatrices of local problem */
481   Mat                    A_RV,A_VR,A_VV;
482   /* submatrices of local coarse problem */
483   Mat                    S_VV,S_CV,S_VC,S_CC;
484   /* working matrices */
485   Mat                    C_CR;
486   /* additional working stuff */
487   PC                     pc_R;
488   Mat                    F;
489   PetscBool              isLU,isCHOL,isILU;
490 
491   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
492   PetscScalar            *work;
493   PetscInt               *idx_V_B;
494   PetscInt               n,n_vertices,n_constraints;
495   PetscInt               i,n_R,n_D,n_B;
496   PetscBool              unsymmetric_check;
497   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
498   MatType                impMatType;
499   /* some shortcuts to scalars */
500   PetscScalar            one=1.0,m_one=-1.0;
501 
502   PetscFunctionBegin;
503   /* get number of vertices (corners plus constraints with change of basis)
504      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
505   n_vertices = pcbddc->n_actual_vertices;
506   n_constraints = pcbddc->local_primal_size-n_vertices;
507   /* Set Non-overlapping dimensions */
508   n_B = pcis->n_B; n_D = pcis->n - n_B;
509   n_R = pcis->n-n_vertices;
510 
511   /* Set types for local objects needed by BDDC precondtioner */
512   impMatType = MATSEQDENSE;
513 
514   /* vertices in boundary numbering */
515   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
516   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
517   if (i != n_vertices) {
518     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
519   }
520 
521   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
522   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
523   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
524   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
525   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
526   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
527   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
528   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
529   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
530   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
531 
532   unsymmetric_check = PETSC_FALSE;
533   /* allocate workspace */
534   n = 0;
535   if (n_constraints) {
536     n += n_R*n_constraints;
537   }
538   if (n_vertices) {
539     n = PetscMax(2*n_R*n_vertices,n);
540   }
541   if (!pcbddc->issym) {
542     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
543     unsymmetric_check = PETSC_TRUE;
544   }
545   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
546 
547   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
548   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
549   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
550   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
551   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
552   if (isLU || isILU || isCHOL) {
553     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
554   } else {
555     F = NULL;
556   }
557 
558   /* Precompute stuffs needed for preprocessing and application of BDDC*/
559   if (n_constraints) {
560     Mat M1,M2,M3;
561     IS  is_aux;
562     /* see if we can save some allocations */
563     if (pcbddc->local_auxmat2) {
564       PetscInt on_R,on_constraints;
565       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
566       if (on_R != n_R || on_constraints != n_constraints) {
567         PetscScalar *marray;
568 
569         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
570         ierr = PetscFree(marray);CHKERRQ(ierr);
571         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
572         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
573       }
574     }
575     /* auxiliary matrices */
576     if (!pcbddc->local_auxmat2) {
577       PetscScalar *marray;
578 
579       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
580       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
581       marray += n_R*n_constraints;
582       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
583     }
584 
585     /* Extract constraints on R nodes: C_{CR}  */
586     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
587     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
588     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
589 
590     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
591     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
592     for (i=0;i<n_constraints;i++) {
593       const PetscScalar *row_cmat_values;
594       const PetscInt    *row_cmat_indices;
595       PetscInt          size_of_constraint,j;
596 
597       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
598       for (j=0;j<size_of_constraint;j++) {
599         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
600       }
601       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
602     }
603     if (F) {
604       Mat B;
605 
606       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
607       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
608       ierr = MatDestroy(&B);CHKERRQ(ierr);
609     } else {
610       PetscScalar *xarray;
611       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
612       for (i=0;i<n_constraints;i++) {
613         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
614         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
615         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
616         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
617         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
618       }
619       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
620     }
621 
622     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
623     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
625     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
626     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
627     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
628     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
629     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
630     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
631     ierr = MatDestroy(&M2);CHKERRQ(ierr);
632     ierr = MatDestroy(&M3);CHKERRQ(ierr);
633     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
634     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
635     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
636     ierr = MatDestroy(&M1);CHKERRQ(ierr);
637   }
638 
639   /* Get submatrices from subdomain matrix */
640   if (n_vertices) {
641     Mat       newmat;
642     IS        is_aux;
643     PetscInt  ibs,mbs;
644     PetscBool issbaij;
645 
646     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
647     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
648     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
649     if (ibs != mbs) { /* need to convert to SEQAIJ */
650       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
651       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
652       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
653       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
654       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
655     } else {
656       /* this is safe */
657       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
658       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
659       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
660         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
661         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
662         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
663         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
664       } else {
665         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
666         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
667       }
668     }
669     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
670   }
671 
672   /* Matrix of coarse basis functions (local) */
673   if (pcbddc->coarse_phi_B) {
674     PetscInt on_B,on_primal,on_D=n_D;
675     if (pcbddc->coarse_phi_D) {
676       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
677     }
678     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
679     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
680       PetscScalar *marray;
681 
682       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
683       ierr = PetscFree(marray);CHKERRQ(ierr);
684       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
685       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
686       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
687       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
688     }
689   }
690 
691   if (!pcbddc->coarse_phi_B) {
692     PetscScalar *marray;
693 
694     n = n_B*pcbddc->local_primal_size;
695     if (pcbddc->switch_static || pcbddc->dbg_flag) {
696       n += n_D*pcbddc->local_primal_size;
697     }
698     if (!pcbddc->issym) {
699       n *= 2;
700     }
701     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
702     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
703     n = n_B*pcbddc->local_primal_size;
704     if (pcbddc->switch_static || pcbddc->dbg_flag) {
705       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
706       n += n_D*pcbddc->local_primal_size;
707     }
708     if (!pcbddc->issym) {
709       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
710       if (pcbddc->switch_static) {
711         n = n_B*pcbddc->local_primal_size;
712         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
713       }
714     } else {
715       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
716       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
717       if (pcbddc->switch_static) {
718         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
719         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
720       }
721     }
722   }
723   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
724   /* vertices */
725   if (n_vertices) {
726     Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
727     PetscScalar  *x,*y;
728     PetscBLASInt B_N,B_one = 1;
729 
730     ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
731     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
732     ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
733     if (F) {
734       ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
735     } else {
736       ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
737       for (i=0;i<n_vertices;i++) {
738         ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
739         ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
740         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
741         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
742         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
743       }
744       ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
745     }
746     ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
747     /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
748     if (n_constraints) {
749       Mat B;
750       ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
751       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
752       ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
753       ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
754       ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
755       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
756       ierr = MatDestroy(&B);CHKERRQ(ierr);
757     }
758     ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
759     ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
760     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
761     ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
762     ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
763     ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
764     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
765     ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
766     ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
767     ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
768     ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
769     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
770     /* coarse basis functions */
771     for (i=0;i<n_vertices;i++) {
772       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
773       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
774       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
775       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
776       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
777       y[n_B*i+idx_V_B[i]] = 1.0;
778       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
779       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
780 
781       if (pcbddc->switch_static || pcbddc->dbg_flag) {
782         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
783         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
784         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
785         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
786         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
787         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
788       }
789       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
790     }
791     ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
792     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
793   }
794 
795   if (n_constraints) {
796     Mat B;
797 
798     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
799     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
800     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
801     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
802     if (n_vertices) {
803       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
804     }
805     ierr = MatDestroy(&B);CHKERRQ(ierr);
806     /* coarse basis functions */
807     for (i=0;i<n_constraints;i++) {
808       PetscScalar *y;
809 
810       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
811       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
812       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
813       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
814       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
815       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
816       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
817 
818       if (pcbddc->switch_static || pcbddc->dbg_flag) {
819         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
820         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
821         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
824         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
825       }
826       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
827     }
828   }
829 
830   /* compute other basis functions for non-symmetric problems */
831   if (!pcbddc->issym) {
832     Mat B,X;
833 
834     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
835 
836     if (n_constraints) {
837       Mat S_VCT,S_CCT,B_C;
838 
839       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
840       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
841       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
842       if (n_vertices) {
843         Mat B_V;
844 
845         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
846         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
847         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
848         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
849       }
850       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
851     }
852     if (n_vertices) {
853       Mat          A_VRT;
854       PetscBLASInt B_N,B_one = 1;
855 
856       if (!n_constraints) { /* if there are no constraints, reset work */
857         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
858       }
859       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
860       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
861       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
862       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
863     }
864 
865     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
866     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
867       for (i=0;i<pcbddc->local_primal_size;i++) {
868         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
869         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
870         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
871         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
872         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
873       }
874     } else {
875       for (i=0;i<pcbddc->local_primal_size;i++) {
876         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
877         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
878         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
879         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
880         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
881       }
882     }
883     ierr = MatDestroy(&B);CHKERRQ(ierr);
884     /* coarse basis functions */
885     for (i=0;i<pcbddc->local_primal_size;i++) {
886       PetscScalar *y;
887 
888       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
889       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
890       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
891       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893       if (i<n_vertices) {
894         y[n_B*i+idx_V_B[i]] = 1.0;
895       }
896       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
897       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
898 
899       if (pcbddc->switch_static || pcbddc->dbg_flag) {
900         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
901         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
902         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
905         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
906       }
907       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
908     }
909     ierr = MatDestroy(&X);CHKERRQ(ierr);
910   }
911 
912   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
913   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
914   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
915   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
916   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
917   /* Checking coarse_sub_mat and coarse basis functios */
918   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
919   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
920   if (pcbddc->dbg_flag) {
921     Mat         coarse_sub_mat;
922     Mat         AUXMAT,TM1,TM2,TM3,TM4;
923     Mat         coarse_phi_D,coarse_phi_B;
924     Mat         coarse_psi_D,coarse_psi_B;
925     Mat         A_II,A_BB,A_IB,A_BI;
926     MatType     checkmattype=MATSEQAIJ;
927     PetscReal   real_value;
928 
929     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
930     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
931     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
932     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
933     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
934     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
935     if (unsymmetric_check) {
936       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
937       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
938     }
939     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
940 
941     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
942     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
944     if (unsymmetric_check) {
945       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
946       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
947       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
948       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
949       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
950       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
951       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
952       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
953       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
954       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
955       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
956       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
957     } else {
958       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
959       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
960       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
961       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
962       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
963       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
964       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
965       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
966     }
967     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
968     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
969     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
970     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
971     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
972     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
973 
974     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
975     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
976     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
977     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
978     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
979     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
980     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
981     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
982     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
983     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
984     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
985     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
986     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
987     if (unsymmetric_check) {
988       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
989       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
990     }
991     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
992   }
993 
994   /* free memory */
995   ierr = PetscFree(work);CHKERRQ(ierr);
996   if (n_vertices) {
997     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
998   }
999   if (n_constraints) {
1000     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1001   }
1002   /* get back data */
1003   *coarse_submat_vals_n = coarse_submat_vals;
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1009 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1010 {
1011   Mat            *work_mat;
1012   IS             isrow_s,iscol_s;
1013   PetscBool      rsorted,csorted;
1014   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1015   PetscErrorCode ierr;
1016 
1017   PetscFunctionBegin;
1018   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1019   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1020   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1021   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1022 
1023   if (!rsorted) {
1024     const PetscInt *idxs;
1025     PetscInt *idxs_sorted,i;
1026 
1027     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1028     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1029     for (i=0;i<rsize;i++) {
1030       idxs_perm_r[i] = i;
1031     }
1032     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1033     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1034     for (i=0;i<rsize;i++) {
1035       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1036     }
1037     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1038     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1039   } else {
1040     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1041     isrow_s = isrow;
1042   }
1043 
1044   if (!csorted) {
1045     if (isrow == iscol) {
1046       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1047       iscol_s = isrow_s;
1048     } else {
1049       const PetscInt *idxs;
1050       PetscInt *idxs_sorted,i;
1051 
1052       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1053       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1054       for (i=0;i<csize;i++) {
1055         idxs_perm_c[i] = i;
1056       }
1057       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1058       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1059       for (i=0;i<csize;i++) {
1060         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1061       }
1062       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1063       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1064     }
1065   } else {
1066     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1067     iscol_s = iscol;
1068   }
1069 
1070   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1071 
1072   if (!rsorted || !csorted) {
1073     Mat      new_mat;
1074     IS       is_perm_r,is_perm_c;
1075 
1076     if (!rsorted) {
1077       PetscInt *idxs_r,i;
1078       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1079       for (i=0;i<rsize;i++) {
1080         idxs_r[idxs_perm_r[i]] = i;
1081       }
1082       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1083       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1084     } else {
1085       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1086     }
1087     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1088 
1089     if (!csorted) {
1090       if (isrow_s == iscol_s) {
1091         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1092         is_perm_c = is_perm_r;
1093       } else {
1094         PetscInt *idxs_c,i;
1095         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1096         for (i=0;i<csize;i++) {
1097           idxs_c[idxs_perm_c[i]] = i;
1098         }
1099         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1100         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1101       }
1102     } else {
1103       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1104     }
1105     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1106 
1107     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1108     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1109     work_mat[0] = new_mat;
1110     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1111     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1112   }
1113 
1114   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1115   *B = work_mat[0];
1116   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1117   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1118   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1124 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1125 {
1126   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1127   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1128   Mat            new_mat;
1129   IS             is_local,is_global;
1130   PetscInt       local_size;
1131   PetscBool      isseqaij;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1136   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1137   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1138   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1139   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1140   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1141   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1142 
1143   /* check */
1144   if (pcbddc->dbg_flag) {
1145     Vec       x,x_change;
1146     PetscReal error;
1147 
1148     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1149     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1150     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1151     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1152     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1153     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1154     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1157     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1158     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1159     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1160     ierr = VecDestroy(&x);CHKERRQ(ierr);
1161     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1162   }
1163 
1164   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1165   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1166   if (isseqaij) {
1167     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1168   } else {
1169     Mat work_mat;
1170     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1171     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1173   }
1174   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1175   /*
1176   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1177   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1178   */
1179   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1185 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1186 {
1187   PC_IS*         pcis = (PC_IS*)(pc->data);
1188   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1189   IS             is_aux1,is_aux2;
1190   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1191   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1192   PetscInt       vbs,bs;
1193   PetscBT        bitmask;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   /*
1198     No need to setup local scatters if
1199       - primal space is unchanged
1200         AND
1201       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1202         AND
1203       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1204   */
1205   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1206     PetscFunctionReturn(0);
1207   }
1208   /* destroy old objects */
1209   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1210   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1212   /* Set Non-overlapping dimensions */
1213   n_B = pcis->n_B; n_D = pcis->n - n_B;
1214   n_vertices = pcbddc->n_actual_vertices;
1215   /* create auxiliary bitmask */
1216   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1217   for (i=0;i<n_vertices;i++) {
1218     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1219   }
1220 
1221   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1222   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1223   for (i=0, n_R=0; i<pcis->n; i++) {
1224     if (!PetscBTLookup(bitmask,i)) {
1225       idx_R_local[n_R] = i;
1226       n_R++;
1227     }
1228   }
1229 
1230   /* Block code */
1231   vbs = 1;
1232   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1233   if (bs>1 && !(n_vertices%bs)) {
1234     PetscBool is_blocked = PETSC_TRUE;
1235     PetscInt  *vary;
1236     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1237     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1238     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1239     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1240     for (i=0; i<n_vertices; i++) {
1241       if (vary[i]!=0 && vary[i]!=bs) {
1242         is_blocked = PETSC_FALSE;
1243         break;
1244       }
1245     }
1246     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1247       vbs = bs;
1248       for (i=0;i<n_R/vbs;i++) {
1249         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1250       }
1251     }
1252     ierr = PetscFree(vary);CHKERRQ(ierr);
1253   }
1254   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1255   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1256 
1257   /* print some info if requested */
1258   if (pcbddc->dbg_flag) {
1259     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1260     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1261     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1262     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1263     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1266     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1267   }
1268 
1269   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1270   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1271   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1272   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1273   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1274   for (i=0; i<n_D; i++) {
1275     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1276   }
1277   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1278   for (i=0, j=0; i<n_R; i++) {
1279     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1280       aux_array1[j++] = i;
1281     }
1282   }
1283   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1284   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1285   for (i=0, j=0; i<n_B; i++) {
1286     if (!PetscBTLookup(bitmask,is_indices[i])) {
1287       aux_array2[j++] = i;
1288     }
1289   }
1290   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1291   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1292   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1293   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1294   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1295 
1296   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1297     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1298     for (i=0, j=0; i<n_R; i++) {
1299       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1300         aux_array1[j++] = i;
1301       }
1302     }
1303     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1304     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1305     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1306   }
1307   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1308   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1315 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1316 {
1317   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1318   PC_IS          *pcis = (PC_IS*)pc->data;
1319   PC             pc_temp;
1320   Mat            A_RR;
1321   MatReuse       reuse;
1322   PetscScalar    m_one = -1.0;
1323   PetscReal      value;
1324   PetscInt       n_D,n_R,ibs,mbs;
1325   PetscBool      use_exact,use_exact_reduced,issbaij;
1326   PetscErrorCode ierr;
1327   /* prefixes stuff */
1328   char           dir_prefix[256],neu_prefix[256],str_level[16];
1329   size_t         len;
1330 
1331   PetscFunctionBegin;
1332 
1333   /* compute prefixes */
1334   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1335   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1336   if (!pcbddc->current_level) {
1337     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1338     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1339     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1340     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1341   } else {
1342     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1343     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1344     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1345     len -= 15; /* remove "pc_bddc_coarse_" */
1346     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1347     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1348     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1349     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1350     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1351     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1352     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1353     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1354   }
1355 
1356   /* DIRICHLET PROBLEM */
1357   if (dirichlet) {
1358     /* Matrix for Dirichlet problem is pcis->A_II */
1359     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1360     if (!pcbddc->ksp_D) { /* create object if not yet build */
1361       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1362       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1363       /* default */
1364       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1365       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1366       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1367       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1368       if (issbaij) {
1369         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1370       } else {
1371         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1372       }
1373       /* Allow user's customization */
1374       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1375       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1376     }
1377     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1378     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1379     if (!n_D) {
1380       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1381       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1382     }
1383     /* Set Up KSP for Dirichlet problem of BDDC */
1384     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1385     /* set ksp_D into pcis data */
1386     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1387     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1388     pcis->ksp_D = pcbddc->ksp_D;
1389   }
1390 
1391   /* NEUMANN PROBLEM */
1392   A_RR = 0;
1393   if (neumann) {
1394     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1395     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1396     if (pcbddc->ksp_R) { /* already created ksp */
1397       PetscInt nn_R;
1398       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1399       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1400       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1401       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1402         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1403         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1404         reuse = MAT_INITIAL_MATRIX;
1405       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1406         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1407           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1408           reuse = MAT_INITIAL_MATRIX;
1409         } else { /* safe to reuse the matrix */
1410           reuse = MAT_REUSE_MATRIX;
1411         }
1412       }
1413       /* last check */
1414       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1415         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1416         reuse = MAT_INITIAL_MATRIX;
1417       }
1418     } else { /* first time, so we need to create the matrix */
1419       reuse = MAT_INITIAL_MATRIX;
1420     }
1421     /* extract A_RR */
1422     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1423     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1424     if (ibs != mbs) {
1425       Mat newmat;
1426       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1427       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1428       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1429     } else {
1430       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1431     }
1432     if (!pcbddc->ksp_R) { /* create object if not present */
1433       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1434       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1435       /* default */
1436       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1437       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1439       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1440       if (issbaij) {
1441         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1442       } else {
1443         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1444       }
1445       /* Allow user's customization */
1446       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1447       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1448     }
1449     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1450     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1451     if (!n_R) {
1452       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1453       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1454     }
1455     /* Set Up KSP for Neumann problem of BDDC */
1456     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1457   }
1458 
1459   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1460   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1461     if (pcbddc->dbg_flag) {
1462       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1465     }
1466     if (dirichlet) { /* Dirichlet */
1467       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1468       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1469       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1470       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1471       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1472       /* need to be adapted? */
1473       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1474       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1475       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1476       /* print info */
1477       if (pcbddc->dbg_flag) {
1478         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1479         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1480       }
1481       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1482         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1483       }
1484     }
1485     if (neumann) { /* Neumann */
1486       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1487       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1488       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1489       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1490       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1491       /* need to be adapted? */
1492       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1493       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1494       /* print info */
1495       if (pcbddc->dbg_flag) {
1496         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1497         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1498       }
1499       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1500         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1501       }
1502     }
1503   }
1504   /* free Neumann problem's matrix */
1505   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1511 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1512 {
1513   PetscErrorCode ierr;
1514   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1515 
1516   PetscFunctionBegin;
1517   if (applytranspose) {
1518     if (pcbddc->local_auxmat1) {
1519       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1520       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1521     }
1522     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1523   } else {
1524     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1525     if (pcbddc->local_auxmat1) {
1526       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1527       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1528     }
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1534 #undef __FUNCT__
1535 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1536 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1537 {
1538   PetscErrorCode ierr;
1539   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1540   PC_IS*            pcis = (PC_IS*)  (pc->data);
1541   const PetscScalar zero = 0.0;
1542 
1543   PetscFunctionBegin;
1544   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1545   if (applytranspose) {
1546     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1547     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1548   } else {
1549     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1550     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1551   }
1552   /* start communications from local primal nodes to rhs of coarse solver */
1553   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1554   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1558   /* TODO remove null space when doing multilevel */
1559   if (pcbddc->coarse_ksp) {
1560     if (applytranspose) {
1561       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1562     } else {
1563       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1564     }
1565   }
1566 
1567   /* Local solution on R nodes */
1568   if (pcis->n) {
1569     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1570     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1571     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1572     if (pcbddc->switch_static) {
1573       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1574       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1575     }
1576     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1577     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1578     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580     if (pcbddc->switch_static) {
1581       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583     }
1584   }
1585 
1586   /* communications from coarse sol to local primal nodes */
1587   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1588   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1589 
1590   /* Sum contributions from two levels */
1591   if (applytranspose) {
1592     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1593     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1594   } else {
1595     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1596     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1602 #undef __FUNCT__
1603 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1604 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1605 {
1606   PetscErrorCode ierr;
1607   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1608   PetscScalar    *array,*array2;
1609   Vec            from,to;
1610 
1611   PetscFunctionBegin;
1612   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1613     from = pcbddc->coarse_vec;
1614     to = pcbddc->vec1_P;
1615     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1616       Vec tvec;
1617       PetscInt lsize;
1618       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1619       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1620       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1621       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1622       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1623       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1624       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1625     }
1626   } else { /* from local to global -> put data in coarse right hand side */
1627     from = pcbddc->vec1_P;
1628     to = pcbddc->coarse_vec;
1629   }
1630   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1636 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1637 {
1638   PetscErrorCode ierr;
1639   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1640   PetscScalar    *array,*array2;
1641   Vec            from,to;
1642 
1643   PetscFunctionBegin;
1644   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1645     from = pcbddc->coarse_vec;
1646     to = pcbddc->vec1_P;
1647   } else { /* from local to global -> put data in coarse right hand side */
1648     from = pcbddc->vec1_P;
1649     to = pcbddc->coarse_vec;
1650   }
1651   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1652   if (smode == SCATTER_FORWARD) {
1653     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1654       Vec tvec;
1655       PetscInt lsize;
1656       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1657       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1658       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1659       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1660       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1661       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1662       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1663     }
1664   }
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 /* uncomment for testing purposes */
1669 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1670 #undef __FUNCT__
1671 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1672 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1673 {
1674   PetscErrorCode    ierr;
1675   PC_IS*            pcis = (PC_IS*)(pc->data);
1676   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1677   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1678   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1679   MatType           impMatType=MATSEQAIJ;
1680   /* one and zero */
1681   PetscScalar       one=1.0,zero=0.0;
1682   /* space to store constraints and their local indices */
1683   PetscScalar       *temp_quadrature_constraint;
1684   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1685   /* iterators */
1686   PetscInt          i,j,k,total_counts,temp_start_ptr;
1687   /* stuff to store connected components stored in pcbddc->mat_graph */
1688   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1689   PetscInt          n_ISForFaces,n_ISForEdges;
1690   /* near null space stuff */
1691   MatNullSpace      nearnullsp;
1692   const Vec         *nearnullvecs;
1693   Vec               *localnearnullsp;
1694   PetscBool         nnsp_has_cnst;
1695   PetscInt          nnsp_size;
1696   PetscScalar       *array;
1697   /* BLAS integers */
1698   PetscBLASInt      lwork,lierr;
1699   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1700   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1701   /* LAPACK working arrays for SVD or POD */
1702   PetscBool         skip_lapack;
1703   PetscScalar       *work;
1704   PetscReal         *singular_vals;
1705 #if defined(PETSC_USE_COMPLEX)
1706   PetscReal         *rwork;
1707 #endif
1708 #if defined(PETSC_MISSING_LAPACK_GESVD)
1709   PetscBLASInt      Blas_one_2=1;
1710   PetscScalar       *temp_basis,*correlation_mat;
1711 #else
1712   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1713   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1714 #endif
1715   /* reuse */
1716   PetscInt          olocal_primal_size;
1717   PetscInt          *oprimal_indices_local_idxs;
1718   /* change of basis */
1719   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1720   PetscBool         boolforchange,qr_needed;
1721   PetscBT           touched,change_basis,qr_needed_idx;
1722   /* auxiliary stuff */
1723   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1724   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1725   PetscScalar       *temp_quadrature_constraint_work;
1726   /* some quantities */
1727   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1728   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1729 
1730 
1731   PetscFunctionBegin;
1732   /* Destroy Mat objects computed previously */
1733   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1734   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1735   /* TODO synch with adaptive selection */
1736   /* Get index sets for faces, edges and vertices from graph */
1737   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1738   /* free unneeded index sets */
1739   if (!pcbddc->use_vertices) {
1740     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1741   }
1742   if (!pcbddc->use_edges) {
1743     for (i=0;i<n_ISForEdges;i++) {
1744       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1745     }
1746     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1747     n_ISForEdges = 0;
1748   }
1749   if (!pcbddc->use_faces) {
1750     for (i=0;i<n_ISForFaces;i++) {
1751       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1752     }
1753     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1754     n_ISForFaces = 0;
1755   }
1756   /* HACKS (the following two blocks of code) */
1757   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1758     pcbddc->use_change_of_basis = PETSC_TRUE;
1759     if (!ISForEdges) {
1760       pcbddc->use_change_on_faces = PETSC_TRUE;
1761     }
1762   }
1763   if (pcbddc->NullSpace) {
1764     /* use_change_of_basis should be consistent among processors */
1765     PetscBool tbool[2],gbool[2];
1766     tbool [0] = pcbddc->use_change_of_basis;
1767     tbool [1] = pcbddc->use_change_on_faces;
1768     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1769     pcbddc->use_change_of_basis = gbool[0];
1770     pcbddc->use_change_on_faces = gbool[1];
1771   }
1772   /* print some info */
1773   if (pcbddc->dbg_flag) {
1774     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1775     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1776     i = 0;
1777     if (ISForVertices) {
1778       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1779     }
1780     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1781     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1782     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1783     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1784   }
1785 
1786   if (!pcbddc->adaptive_selection) {
1787     /* check if near null space is attached to global mat */
1788     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1789     if (nearnullsp) {
1790       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1791       /* remove any stored info */
1792       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1793       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1794       /* store information for BDDC solver reuse */
1795       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1796       pcbddc->onearnullspace = nearnullsp;
1797       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1798       for (i=0;i<nnsp_size;i++) {
1799         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1800       }
1801     } else { /* if near null space is not provided BDDC uses constants by default */
1802       nnsp_size = 0;
1803       nnsp_has_cnst = PETSC_TRUE;
1804     }
1805     /* get max number of constraints on a single cc */
1806     max_constraints = nnsp_size;
1807     if (nnsp_has_cnst) max_constraints++;
1808 
1809     /*
1810          Evaluate maximum storage size needed by the procedure
1811          - temp_indices will contain start index of each constraint stored as follows
1812          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1813          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1814          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1815                                                                                                                                                            */
1816     total_counts = n_ISForFaces+n_ISForEdges;
1817     total_counts *= max_constraints;
1818     n_vertices = 0;
1819     if (ISForVertices) {
1820       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1821     }
1822     total_counts += n_vertices;
1823     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1824     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1825     total_counts = 0;
1826     max_size_of_constraint = 0;
1827     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1828       if (i<n_ISForEdges) {
1829         used_IS = &ISForEdges[i];
1830       } else {
1831         used_IS = &ISForFaces[i-n_ISForEdges];
1832       }
1833       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1834       total_counts += j;
1835       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1836     }
1837     total_counts *= max_constraints;
1838     total_counts += n_vertices;
1839     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1840     /* get local part of global near null space vectors */
1841     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1842     for (k=0;k<nnsp_size;k++) {
1843       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1844       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1845       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1846     }
1847 
1848     /* whether or not to skip lapack calls */
1849     skip_lapack = PETSC_TRUE;
1850     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1851 
1852     /* allocate some auxiliary stuff */
1853     if (!skip_lapack || pcbddc->use_qr_single) {
1854       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1855     }
1856 
1857     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1858     if (!skip_lapack) {
1859       PetscScalar temp_work;
1860 
1861 #if defined(PETSC_MISSING_LAPACK_GESVD)
1862       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1863       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1864       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1865       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1866 #if defined(PETSC_USE_COMPLEX)
1867       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1868 #endif
1869       /* now we evaluate the optimal workspace using query with lwork=-1 */
1870       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1871       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1872       lwork = -1;
1873       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1874 #if !defined(PETSC_USE_COMPLEX)
1875       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1876 #else
1877       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1878 #endif
1879       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1880       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1881 #else /* on missing GESVD */
1882       /* SVD */
1883       PetscInt max_n,min_n;
1884       max_n = max_size_of_constraint;
1885       min_n = max_constraints;
1886       if (max_size_of_constraint < max_constraints) {
1887         min_n = max_size_of_constraint;
1888         max_n = max_constraints;
1889       }
1890       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1891 #if defined(PETSC_USE_COMPLEX)
1892       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1893 #endif
1894       /* now we evaluate the optimal workspace using query with lwork=-1 */
1895       lwork = -1;
1896       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1897       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1898       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1899       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1900 #if !defined(PETSC_USE_COMPLEX)
1901       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1902 #else
1903       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1904 #endif
1905       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1906       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1907 #endif /* on missing GESVD */
1908       /* Allocate optimal workspace */
1909       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1910       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1911     }
1912     /* Now we can loop on constraining sets */
1913     total_counts = 0;
1914     temp_indices[0] = 0;
1915     /* vertices */
1916     if (ISForVertices) {
1917       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1918       if (nnsp_has_cnst) { /* consider all vertices */
1919         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1920         for (i=0;i<n_vertices;i++) {
1921           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1922           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1923           total_counts++;
1924         }
1925       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1926         PetscBool used_vertex;
1927         for (i=0;i<n_vertices;i++) {
1928           used_vertex = PETSC_FALSE;
1929           k = 0;
1930           while (!used_vertex && k<nnsp_size) {
1931             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1932             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1933               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1934               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1935               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1936               total_counts++;
1937               used_vertex = PETSC_TRUE;
1938             }
1939             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1940             k++;
1941           }
1942         }
1943       }
1944       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1945       n_vertices = total_counts;
1946     }
1947 
1948     /* edges and faces */
1949     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1950       if (ncc<n_ISForEdges) {
1951         used_IS = &ISForEdges[ncc];
1952         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1953       } else {
1954         used_IS = &ISForFaces[ncc-n_ISForEdges];
1955         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1956       }
1957       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1958       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1959       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1960       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1961       /* change of basis should not be performed on local periodic nodes */
1962       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1963       if (nnsp_has_cnst) {
1964         PetscScalar quad_value;
1965         temp_constraints++;
1966         if (!pcbddc->use_nnsp_true) {
1967           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1968         } else {
1969           quad_value = 1.0;
1970         }
1971         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1972         for (j=0;j<size_of_constraint;j++) {
1973           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1974         }
1975         /* sort by global ordering if using lapack subroutines (not needed!) */
1976         if (!skip_lapack || pcbddc->use_qr_single) {
1977           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1978           for (j=0;j<size_of_constraint;j++) {
1979             permutation[j]=j;
1980           }
1981           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1982           for (j=0;j<size_of_constraint;j++) {
1983             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
1984           }
1985           for (j=0;j<size_of_constraint;j++) {
1986             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1987             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1988           }
1989           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1990           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1991         }
1992         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1993         total_counts++;
1994       }
1995       for (k=0;k<nnsp_size;k++) {
1996         PetscReal real_value;
1997         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1998         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1999         for (j=0;j<size_of_constraint;j++) {
2000           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2001         }
2002         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2003         /* check if array is null on the connected component */
2004         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2005         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2006         if (real_value > 0.0) { /* keep indices and values */
2007           /* sort by global ordering if using lapack subroutines */
2008           if (!skip_lapack || pcbddc->use_qr_single) {
2009             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2010             for (j=0;j<size_of_constraint;j++) {
2011               permutation[j]=j;
2012             }
2013             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2014             for (j=0;j<size_of_constraint;j++) {
2015               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2016               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2017             }
2018             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2019             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2020           }
2021           temp_constraints++;
2022           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2023           total_counts++;
2024         }
2025       }
2026       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2027       valid_constraints = temp_constraints;
2028       if (!pcbddc->use_nnsp_true && temp_constraints) {
2029         if (temp_constraints == 1) { /* just normalize the constraint */
2030           PetscScalar norm;
2031           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2032           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2033           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2034           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2035         } else { /* perform SVD */
2036           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2037 
2038 #if defined(PETSC_MISSING_LAPACK_GESVD)
2039           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2040              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2041              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2042                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2043                 from that computed using LAPACKgesvd
2044              -> This is due to a different computation of eigenvectors in LAPACKheev
2045              -> The quality of the POD-computed basis will be the same */
2046           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2047           /* Store upper triangular part of correlation matrix */
2048           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2049           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2050           for (j=0;j<temp_constraints;j++) {
2051             for (k=0;k<j+1;k++) {
2052               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2053             }
2054           }
2055           /* compute eigenvalues and eigenvectors of correlation matrix */
2056           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2057           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2058 #if !defined(PETSC_USE_COMPLEX)
2059           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2060 #else
2061           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2062 #endif
2063           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2064           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2065           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2066           j = 0;
2067           while (j < temp_constraints && singular_vals[j] < tol) j++;
2068           total_counts = total_counts-j;
2069           valid_constraints = temp_constraints-j;
2070           /* scale and copy POD basis into used quadrature memory */
2071           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2072           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2073           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2074           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2075           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2076           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2077           if (j<temp_constraints) {
2078             PetscInt ii;
2079             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2080             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2081             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2082             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2083             for (k=0;k<temp_constraints-j;k++) {
2084               for (ii=0;ii<size_of_constraint;ii++) {
2085                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2086               }
2087             }
2088           }
2089 #else  /* on missing GESVD */
2090           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2091           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2093           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2094 #if !defined(PETSC_USE_COMPLEX)
2095           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2096 #else
2097           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2098 #endif
2099           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2100           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2101           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2102           k = temp_constraints;
2103           if (k > size_of_constraint) k = size_of_constraint;
2104           j = 0;
2105           while (j < k && singular_vals[k-j-1] < tol) j++;
2106           valid_constraints = k-j;
2107           total_counts = total_counts-temp_constraints+valid_constraints;
2108 #endif /* on missing GESVD */
2109         }
2110       }
2111       /* setting change_of_basis flag is safe now */
2112       if (boolforchange) {
2113         for (j=0;j<valid_constraints;j++) {
2114           PetscBTSet(change_basis,total_counts-j-1);
2115         }
2116       }
2117     }
2118     /* free workspace */
2119     if (!skip_lapack || pcbddc->use_qr_single) {
2120       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2121     }
2122     if (!skip_lapack) {
2123       ierr = PetscFree(work);CHKERRQ(ierr);
2124 #if defined(PETSC_USE_COMPLEX)
2125       ierr = PetscFree(rwork);CHKERRQ(ierr);
2126 #endif
2127       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2128 #if defined(PETSC_MISSING_LAPACK_GESVD)
2129       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2130       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2131 #endif
2132     }
2133     for (k=0;k<nnsp_size;k++) {
2134       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2135     }
2136     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2137   } else {
2138     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2139     PetscInt        cum = 0;
2140 
2141     total_counts = 0;
2142     n_vertices = 0;
2143     if (sub_schurs->is_Ej_com) {
2144       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2145     }
2146     max_constraints = 0;
2147     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2148       total_counts += pcbddc->adaptive_constraints_n[i];
2149       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2150     }
2151     temp_indices = pcbddc->adaptive_constraints_ptrs;
2152     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2153     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2154 
2155 #if 0
2156     printf("Found %d totals\n",total_counts);
2157     for (i=0;i<total_counts;i++) {
2158       printf("const %d, start %d",i,temp_indices[i]);
2159       printf(" end %d:\n",temp_indices[i+1]);
2160       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2161         printf("  idxs %d",temp_indices_to_constraint[j]);
2162         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2163       }
2164     }
2165 #endif
2166 
2167     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2168     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2169     /* Change of basis */
2170     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2171     if (pcbddc->use_change_of_basis) {
2172       cum = n_vertices;
2173       for (i=0;i<sub_schurs->n_subs;i++) {
2174         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2175           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2176             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2177           }
2178         }
2179         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2180       }
2181     }
2182   }
2183 
2184   /* free index sets of faces, edges and vertices */
2185   for (i=0;i<n_ISForFaces;i++) {
2186     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2187   }
2188   if (n_ISForFaces) {
2189     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2190   }
2191   for (i=0;i<n_ISForEdges;i++) {
2192     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2193   }
2194   if (n_ISForEdges) {
2195     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2196   }
2197   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2198 
2199   /* map temp_indices_to_constraint in boundary numbering */
2200   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2201   if (i != temp_indices[total_counts]) {
2202     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2203   }
2204 
2205   /* set quantities in pcbddc data structure and store previous primal size */
2206   /* n_vertices defines the number of subdomain corners in the primal space */
2207   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2208   olocal_primal_size = pcbddc->local_primal_size;
2209   pcbddc->local_primal_size = total_counts;
2210   pcbddc->n_vertices = n_vertices;
2211   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2212 
2213   /* Create constraint matrix */
2214   /* The constraint matrix is used to compute the l2g map of primal dofs */
2215   /* so we need to set it up properly either with or without change of basis */
2216   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2217   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
2218   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2219   /* array to compute a local numbering of constraints : vertices first then constraints */
2220   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2221   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2222   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2223   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2224   /* auxiliary stuff for basis change */
2225   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2226   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2227 
2228   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2229   total_primal_vertices=0;
2230   for (i=0;i<pcbddc->local_primal_size;i++) {
2231     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2232     if (size_of_constraint == 1) {
2233       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2234       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2235       aux_primal_minloc[total_primal_vertices]=0;
2236       total_primal_vertices++;
2237     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2238       PetscInt min_loc,min_index;
2239       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2240       /* find first untouched local node */
2241       k = 0;
2242       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2243       min_index = global_indices[k];
2244       min_loc = k;
2245       /* search the minimum among global nodes already untouched on the cc */
2246       for (k=1;k<size_of_constraint;k++) {
2247         /* there can be more than one constraint on a single connected component */
2248         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2249           min_index = global_indices[k];
2250           min_loc = k;
2251         }
2252       }
2253       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2254       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2255       aux_primal_minloc[total_primal_vertices]=min_loc;
2256       total_primal_vertices++;
2257     }
2258   }
2259   /* determine if a QR strategy is needed for change of basis */
2260   qr_needed = PETSC_FALSE;
2261   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2262   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2263     if (PetscBTLookup(change_basis,i)) {
2264       if (!pcbddc->use_qr_single) {
2265         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2266         j = 0;
2267         for (k=0;k<size_of_constraint;k++) {
2268           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2269             j++;
2270           }
2271         }
2272         /* found more than one primal dof on the cc */
2273         if (j > 1) {
2274           PetscBTSet(qr_needed_idx,i);
2275           qr_needed = PETSC_TRUE;
2276         }
2277       } else {
2278         PetscBTSet(qr_needed_idx,i);
2279         qr_needed = PETSC_TRUE;
2280       }
2281     }
2282   }
2283   /* free workspace */
2284   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2285 
2286   /* permute indices in order to have a sorted set of vertices */
2287   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2288 
2289   /* nonzero structure of constraint matrix */
2290   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2291   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2292   j=total_primal_vertices;
2293   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2294     if (!PetscBTLookup(change_basis,i)) {
2295       nnz[j]=temp_indices[i+1]-temp_indices[i];
2296       j++;
2297     }
2298   }
2299   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2300   ierr = PetscFree(nnz);CHKERRQ(ierr);
2301   /* set values in constraint matrix */
2302   for (i=0;i<total_primal_vertices;i++) {
2303     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2304   }
2305   total_counts = total_primal_vertices;
2306   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2307     if (!PetscBTLookup(change_basis,i)) {
2308       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2309       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2310       total_counts++;
2311     }
2312   }
2313   /* assembling */
2314   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2315   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2316   /*
2317   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2318   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2319   */
2320   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2321   if (pcbddc->use_change_of_basis) {
2322     /* dual and primal dofs on a single cc */
2323     PetscInt     dual_dofs,primal_dofs;
2324     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2325     PetscInt     primal_counter;
2326     /* working stuff for GEQRF */
2327     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2328     PetscBLASInt lqr_work;
2329     /* working stuff for UNGQR */
2330     PetscScalar  *gqr_work,lgqr_work_t;
2331     PetscBLASInt lgqr_work;
2332     /* working stuff for TRTRS */
2333     PetscScalar  *trs_rhs;
2334     PetscBLASInt Blas_NRHS;
2335     /* pointers for values insertion into change of basis matrix */
2336     PetscInt     *start_rows,*start_cols;
2337     PetscScalar  *start_vals;
2338     /* working stuff for values insertion */
2339     PetscBT      is_primal;
2340     /* matrix sizes */
2341     PetscInt     global_size,local_size;
2342     /* temporary change of basis */
2343     Mat          localChangeOfBasisMatrix;
2344 
2345     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2346     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2347     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2348     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2349 
2350     /* nonzeros for local mat */
2351     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2352     for (i=0;i<pcis->n;i++) nnz[i]=1;
2353     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2354       if (PetscBTLookup(change_basis,i)) {
2355         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2356         if (PetscBTLookup(qr_needed_idx,i)) {
2357           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2358         } else {
2359           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2360           /* get local primal index on the cc */
2361           j = 0;
2362           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2363           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2364         }
2365       }
2366     }
2367     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2368     ierr = PetscFree(nnz);CHKERRQ(ierr);
2369     /* Set initial identity in the matrix */
2370     for (i=0;i<pcis->n;i++) {
2371       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2372     }
2373 
2374     if (pcbddc->dbg_flag) {
2375       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2376       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2377     }
2378 
2379 
2380     /* Now we loop on the constraints which need a change of basis */
2381     /*
2382        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2383        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2384 
2385        Basic blocks of change of basis matrix T computed by
2386 
2387           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2388 
2389             | 1        0   ...        0         s_1/S |
2390             | 0        1   ...        0         s_2/S |
2391             |              ...                        |
2392             | 0        ...            1     s_{n-1}/S |
2393             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2394 
2395             with S = \sum_{i=1}^n s_i^2
2396             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2397                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2398 
2399           - QR decomposition of constraints otherwise
2400     */
2401     if (qr_needed) {
2402       /* space to store Q */
2403       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2404       /* first we issue queries for optimal work */
2405       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2406       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2407       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2408       lqr_work = -1;
2409       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2410       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2411       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2412       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2413       lgqr_work = -1;
2414       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2415       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2416       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2417       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2418       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2419       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2420       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2421       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2422       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2423       /* array to store scaling factors for reflectors */
2424       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2425       /* array to store rhs and solution of triangular solver */
2426       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2427       /* allocating workspace for check */
2428       if (pcbddc->dbg_flag) {
2429         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2430       }
2431     }
2432     /* array to store whether a node is primal or not */
2433     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2434     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2435     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2436     if (i != total_primal_vertices) {
2437       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2438     }
2439     for (i=0;i<total_primal_vertices;i++) {
2440       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2441     }
2442     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2443 
2444     /* loop on constraints and see whether or not they need a change of basis and compute it */
2445     /* -> using implicit ordering contained in temp_indices data */
2446     total_counts = pcbddc->n_vertices;
2447     primal_counter = total_counts;
2448     while (total_counts<pcbddc->local_primal_size) {
2449       primal_dofs = 1;
2450       if (PetscBTLookup(change_basis,total_counts)) {
2451         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2452         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2453           primal_dofs++;
2454         }
2455         /* get constraint info */
2456         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2457         dual_dofs = size_of_constraint-primal_dofs;
2458 
2459         if (pcbddc->dbg_flag) {
2460           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2461         }
2462 
2463         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2464 
2465           /* copy quadrature constraints for change of basis check */
2466           if (pcbddc->dbg_flag) {
2467             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2468           }
2469           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2470           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2471 
2472           /* compute QR decomposition of constraints */
2473           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2474           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2475           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2476           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2477           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2478           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2479           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2480 
2481           /* explictly compute R^-T */
2482           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2483           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2484           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2485           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2486           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2487           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2488           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2489           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2490           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2491           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2492 
2493           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2494           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2495           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2496           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2497           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2498           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2499           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2500           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2501           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2502 
2503           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2504              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2505              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2506           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2507           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2508           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2509           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2510           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2511           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2512           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2513           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2514           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2515           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2516 
2517           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2518           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2519           /* insert cols for primal dofs */
2520           for (j=0;j<primal_dofs;j++) {
2521             start_vals = &qr_basis[j*size_of_constraint];
2522             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2523             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2524           }
2525           /* insert cols for dual dofs */
2526           for (j=0,k=0;j<dual_dofs;k++) {
2527             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2528               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2529               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2530               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2531               j++;
2532             }
2533           }
2534 
2535           /* check change of basis */
2536           if (pcbddc->dbg_flag) {
2537             PetscInt   ii,jj;
2538             PetscBool valid_qr=PETSC_TRUE;
2539             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2540             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2541             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2542             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2543             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2544             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2545             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2546             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2547             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2548             for (jj=0;jj<size_of_constraint;jj++) {
2549               for (ii=0;ii<primal_dofs;ii++) {
2550                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2551                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2552               }
2553             }
2554             if (!valid_qr) {
2555               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2556               for (jj=0;jj<size_of_constraint;jj++) {
2557                 for (ii=0;ii<primal_dofs;ii++) {
2558                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2559                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2560                   }
2561                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2562                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2563                   }
2564                 }
2565               }
2566             } else {
2567               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2568             }
2569           }
2570         } else { /* simple transformation block */
2571           PetscInt    row,col;
2572           PetscScalar val,norm;
2573 
2574           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2575           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2576           for (j=0;j<size_of_constraint;j++) {
2577             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2578             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2579             if (!PetscBTLookup(is_primal,row_B)) {
2580               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2581               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2582               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2583             } else {
2584               for (k=0;k<size_of_constraint;k++) {
2585                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2586                 if (row != col) {
2587                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2588                 } else {
2589                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2590                 }
2591                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2592               }
2593             }
2594           }
2595           if (pcbddc->dbg_flag) {
2596             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2597           }
2598         }
2599         /* increment primal counter */
2600         primal_counter += primal_dofs;
2601       } else {
2602         if (pcbddc->dbg_flag) {
2603           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2604         }
2605       }
2606       /* increment constraint counter total_counts */
2607       total_counts += primal_dofs;
2608     }
2609 
2610     /* free workspace */
2611     if (qr_needed) {
2612       if (pcbddc->dbg_flag) {
2613         ierr = PetscFree(work);CHKERRQ(ierr);
2614       }
2615       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2616       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2617       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2618       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2619       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2620     }
2621     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2622     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2623     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2624 
2625     /* assembling of global change of variable */
2626     {
2627       Mat tmat;
2628       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2629       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2630       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2631       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2632       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2633       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2634       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2635       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2636       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2637       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2638       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2639       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2640       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2641       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2642       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2643       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2644     }
2645     /* check */
2646     if (pcbddc->dbg_flag) {
2647       PetscReal error;
2648       Vec       x,x_change;
2649 
2650       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2651       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2652       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2653       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2654       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2655       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2656       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2657       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2658       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2659       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2660       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2661       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2662       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2663       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2664       ierr = VecDestroy(&x);CHKERRQ(ierr);
2665       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2666     }
2667 
2668     /* adapt sub_schurs computed (if any) */
2669     if (pcbddc->use_deluxe_scaling) {
2670       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2671       if (sub_schurs->n_subs_par_g) {
2672         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2673       }
2674       if (sub_schurs->S_Ej_all) {
2675         Mat S_1,S_2,tmat;
2676         IS is_all_N;
2677 
2678         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2679         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2680         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2681         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2682         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2683         sub_schurs->S_Ej_all = S_1;
2684         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2685         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2686         sub_schurs->sum_S_Ej_all = S_2;
2687         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2688       }
2689     }
2690     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2691   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2692     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2693     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2694   }
2695 
2696   /* set up change of basis context */
2697   if (pcbddc->ChangeOfBasisMatrix) {
2698     PCBDDCChange_ctx change_ctx;
2699 
2700     if (!pcbddc->new_global_mat) {
2701       PetscInt global_size,local_size;
2702 
2703       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2704       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2705       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2706       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2707       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2708       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2709       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2710       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2711       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2712     } else {
2713       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2714       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2715       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2716     }
2717     if (!pcbddc->user_ChangeOfBasisMatrix) {
2718       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2719       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2720     } else {
2721       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2722       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2723     }
2724     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2725     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2726     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2727     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2728   }
2729 
2730   /* get indices in local ordering for vertices and constraints */
2731   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2732     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2733     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2734   }
2735   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2736   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2737   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2738   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2739   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2740   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2741   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2742   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2743   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2744   /* set quantities in PCBDDC data struct */
2745   pcbddc->n_actual_vertices = i;
2746   /* check if a new primal space has been introduced */
2747   pcbddc->new_primal_space_local = PETSC_TRUE;
2748   if (olocal_primal_size == pcbddc->local_primal_size) {
2749     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2750     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2751     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2752   }
2753   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2754   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2755 
2756   /* flush dbg viewer */
2757   if (pcbddc->dbg_flag) {
2758     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2759   }
2760 
2761   /* free workspace */
2762   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2763   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2764   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2765   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2766   if (!pcbddc->adaptive_selection) {
2767     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2768     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2769   } else {
2770     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2771                       pcbddc->adaptive_constraints_ptrs,
2772                       pcbddc->adaptive_constraints_idxs,
2773                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2774     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2775   }
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 #undef __FUNCT__
2780 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2781 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2782 {
2783   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2784   PC_IS       *pcis = (PC_IS*)pc->data;
2785   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2786   PetscInt    ierr,i,vertex_size;
2787   PetscViewer viewer=pcbddc->dbg_viewer;
2788 
2789   PetscFunctionBegin;
2790   /* Reset previously computed graph */
2791   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2792   /* Init local Graph struct */
2793   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2794 
2795   /* Check validity of the csr graph passed in by the user */
2796   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2797     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2798   }
2799 
2800   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2801   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2802     Mat       mat_adj;
2803     PetscInt  *xadj,*adjncy;
2804     PetscInt  nvtxs;
2805     PetscBool flg_row=PETSC_TRUE;
2806 
2807     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2808     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2809     if (!flg_row) {
2810       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2811     }
2812     if (pcbddc->use_local_adj) {
2813       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2814       pcbddc->computed_rowadj = PETSC_TRUE;
2815     } else { /* just compute subdomain's connected components */
2816       IS                     is_dummy;
2817       ISLocalToGlobalMapping l2gmap_dummy;
2818       PetscInt               j,sum;
2819       PetscInt               *cxadj,*cadjncy;
2820       const PetscInt         *idxs;
2821       PCBDDCGraph            graph;
2822       PetscBT                is_on_boundary;
2823 
2824       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2825       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2826       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2827       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2828       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2829       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2830       graph->xadj = xadj;
2831       graph->adjncy = adjncy;
2832       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2833       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2834 
2835       if (pcbddc->dbg_flag) {
2836         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2837         for (i=0;i<graph->ncc;i++) {
2838           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2839         }
2840         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2841       }
2842 
2843       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2844       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2845       for (i=0;i<pcis->n_B;i++) {
2846         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2847       }
2848       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2849 
2850       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2851       sum = 0;
2852       for (i=0;i<graph->ncc;i++) {
2853         PetscInt sizecc = 0;
2854         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2855           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2856             sizecc++;
2857           }
2858         }
2859         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2860           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2861             cxadj[graph->queue[j]] = sizecc;
2862           }
2863         }
2864         sum += sizecc*sizecc;
2865       }
2866       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2867       sum = 0;
2868       for (i=0;i<nvtxs;i++) {
2869         PetscInt temp = cxadj[i];
2870         cxadj[i] = sum;
2871         sum += temp;
2872       }
2873       cxadj[nvtxs] = sum;
2874       for (i=0;i<graph->ncc;i++) {
2875         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2876           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2877             PetscInt k,sizecc = 0;
2878             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2879               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2880                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2881                 sizecc++;
2882               }
2883             }
2884           }
2885         }
2886       }
2887       if (nvtxs) {
2888         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2889       } else {
2890         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2891         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2892       }
2893       graph->xadj = 0;
2894       graph->adjncy = 0;
2895       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2896       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2897     }
2898     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2899     if (!flg_row) {
2900       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2901     }
2902     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2903   }
2904 
2905   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2906   vertex_size = 1;
2907   if (pcbddc->user_provided_isfordofs) {
2908     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2909       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2910       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2911         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2912         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2913       }
2914       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2915       pcbddc->n_ISForDofs = 0;
2916       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2917     }
2918     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2919     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2920   } else {
2921     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2922       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2923       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2924       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2925         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2926       }
2927     }
2928   }
2929 
2930   /* Setup of Graph */
2931   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2932     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2933   }
2934   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2935     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2936   }
2937   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2938 
2939   /* Graph's connected components analysis */
2940   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2941 
2942   /* print some info to stdout */
2943   if (pcbddc->dbg_flag) {
2944     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2945   }
2946 
2947   /* mark topography has done */
2948   pcbddc->recompute_topography = PETSC_FALSE;
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2954 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2955 {
2956   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2957   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2958   PetscErrorCode ierr;
2959 
2960   PetscFunctionBegin;
2961   n = 0;
2962   vertices = 0;
2963   if (pcbddc->ConstraintMatrix) {
2964     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2965     for (i=0;i<local_primal_size;i++) {
2966       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2967       if (size_of_constraint == 1) n++;
2968       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2969     }
2970     if (vertices_idx) {
2971       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
2972       n = 0;
2973       for (i=0;i<local_primal_size;i++) {
2974         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2975         if (size_of_constraint == 1) {
2976           vertices[n++]=row_cmat_indices[0];
2977         }
2978         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2979       }
2980     }
2981   }
2982   *n_vertices = n;
2983   if (vertices_idx) *vertices_idx = vertices;
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 #undef __FUNCT__
2988 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2989 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2990 {
2991   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2992   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2993   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2994   PetscBT        touched;
2995   PetscErrorCode ierr;
2996 
2997     /* This function assumes that the number of local constraints per connected component
2998        is not greater than the number of nodes defined for the connected component
2999        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3000   PetscFunctionBegin;
3001   n = 0;
3002   constraints_index = 0;
3003   if (pcbddc->ConstraintMatrix) {
3004     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3005     max_size_of_constraint = 0;
3006     for (i=0;i<local_primal_size;i++) {
3007       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3008       if (size_of_constraint > 1) {
3009         n++;
3010       }
3011       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3012       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3013     }
3014     if (constraints_idx) {
3015       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3016       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3017       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3018       n = 0;
3019       for (i=0;i<local_primal_size;i++) {
3020         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3021         if (size_of_constraint > 1) {
3022           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3023           /* find first untouched local node */
3024           j = 0;
3025           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3026           min_index = row_cmat_global_indices[j];
3027           min_loc = j;
3028           /* search the minimum among nodes not yet touched on the connected component
3029              since there can be more than one constraint on a single cc */
3030           for (j=1;j<size_of_constraint;j++) {
3031             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3032               min_index = row_cmat_global_indices[j];
3033               min_loc = j;
3034             }
3035           }
3036           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3037           constraints_index[n++] = row_cmat_indices[min_loc];
3038         }
3039         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3040       }
3041       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3042       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3043     }
3044   }
3045   *n_constraints = n;
3046   if (constraints_idx) *constraints_idx = constraints_index;
3047   PetscFunctionReturn(0);
3048 }
3049 
3050 #undef __FUNCT__
3051 #define __FUNCT__ "PCBDDCSubsetNumbering"
3052 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3053 {
3054   Vec            local_vec,global_vec;
3055   IS             seqis,paris;
3056   VecScatter     scatter_ctx;
3057   PetscScalar    *array;
3058   PetscInt       *temp_global_dofs;
3059   PetscScalar    globalsum;
3060   PetscInt       i,j,s;
3061   PetscInt       nlocals,first_index,old_index,max_local;
3062   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3063   PetscMPIInt    *dof_sizes,*dof_displs;
3064   PetscBool      first_found;
3065   PetscErrorCode ierr;
3066 
3067   PetscFunctionBegin;
3068   /* mpi buffers */
3069   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3070   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3071   j = ( !rank_prec_comm ? size_prec_comm : 0);
3072   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3073   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3074   /* get maximum size of subset */
3075   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3076   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3077   max_local = 0;
3078   for (i=0;i<n_local_dofs;i++) {
3079     if (max_local < temp_global_dofs[i] ) {
3080       max_local = temp_global_dofs[i];
3081     }
3082   }
3083   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3084   max_global++;
3085   max_local = 0;
3086   for (i=0;i<n_local_dofs;i++) {
3087     if (max_local < local_dofs[i] ) {
3088       max_local = local_dofs[i];
3089     }
3090   }
3091   max_local++;
3092   /* allocate workspace */
3093   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3094   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3095   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3096   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3097   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3098   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3099   /* create scatter */
3100   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3101   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3102   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3103   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3104   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3105   /* init array */
3106   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3107   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3108   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3109   if (local_dofs_mult) {
3110     for (i=0;i<n_local_dofs;i++) {
3111       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3112     }
3113   } else {
3114     for (i=0;i<n_local_dofs;i++) {
3115       array[local_dofs[i]]=1.0;
3116     }
3117   }
3118   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3119   /* scatter into global vec and get total number of global dofs */
3120   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3121   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3122   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3123   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3124   /* Fill global_vec with cumulative function for global numbering */
3125   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3126   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3127   nlocals = 0;
3128   first_index = -1;
3129   first_found = PETSC_FALSE;
3130   for (i=0;i<s;i++) {
3131     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3132       first_found = PETSC_TRUE;
3133       first_index = i;
3134     }
3135     nlocals += (PetscInt)PetscRealPart(array[i]);
3136   }
3137   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3138   if (!rank_prec_comm) {
3139     dof_displs[0]=0;
3140     for (i=1;i<size_prec_comm;i++) {
3141       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3142     }
3143   }
3144   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3145   if (first_found) {
3146     array[first_index] += (PetscScalar)nlocals;
3147     old_index = first_index;
3148     for (i=first_index+1;i<s;i++) {
3149       if (PetscRealPart(array[i]) > 0.1) {
3150         array[i] += array[old_index];
3151         old_index = i;
3152       }
3153     }
3154   }
3155   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3156   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3157   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3158   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3159   /* get global ordering of local dofs */
3160   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3161   if (local_dofs_mult) {
3162     for (i=0;i<n_local_dofs;i++) {
3163       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3164     }
3165   } else {
3166     for (i=0;i<n_local_dofs;i++) {
3167       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3168     }
3169   }
3170   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3171   /* free workspace */
3172   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3173   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3174   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3175   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3176   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3177   /* return pointer to global ordering of local dofs */
3178   *global_numbering_subset = temp_global_dofs;
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 #undef __FUNCT__
3183 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3184 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3185 {
3186   PetscInt       i,j;
3187   PetscScalar    *alphas;
3188   PetscErrorCode ierr;
3189 
3190   PetscFunctionBegin;
3191   /* this implements stabilized Gram-Schmidt */
3192   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3193   for (i=0;i<n;i++) {
3194     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3195     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3196     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3197   }
3198   ierr = PetscFree(alphas);CHKERRQ(ierr);
3199   PetscFunctionReturn(0);
3200 }
3201 
3202 #undef __FUNCT__
3203 #define __FUNCT__ "MatISGetSubassemblingPattern"
3204 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3205 {
3206   Mat             subdomain_adj;
3207   IS              new_ranks,ranks_send_to;
3208   MatPartitioning partitioner;
3209   Mat_IS          *matis;
3210   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3211   PetscInt        prank;
3212   PetscMPIInt     size,rank,color;
3213   PetscInt        *xadj,*adjncy,*oldranks;
3214   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3215   PetscInt        i,local_size,threshold=0;
3216   PetscErrorCode  ierr;
3217   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3218   PetscSubcomm    subcomm;
3219 
3220   PetscFunctionBegin;
3221   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3222   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3223   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3224 
3225   /* Get info on mapping */
3226   matis = (Mat_IS*)(mat->data);
3227   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3228   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3229 
3230   /* build local CSR graph of subdomains' connectivity */
3231   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3232   xadj[0] = 0;
3233   xadj[1] = PetscMax(n_neighs-1,0);
3234   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3235   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3236 
3237   if (threshold) {
3238     PetscInt xadj_count = 0;
3239     for (i=1;i<n_neighs;i++) {
3240       if (n_shared[i] > threshold) {
3241         adjncy[xadj_count] = neighs[i];
3242         adjncy_wgt[xadj_count] = n_shared[i];
3243         xadj_count++;
3244       }
3245     }
3246     xadj[1] = xadj_count;
3247   } else {
3248     if (xadj[1]) {
3249       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3250       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3251     }
3252   }
3253   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3254   if (use_square) {
3255     for (i=0;i<xadj[1];i++) {
3256       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3257     }
3258   }
3259   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3260 
3261   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3262 
3263   /*
3264     Restrict work on active processes only.
3265   */
3266   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3267   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3268   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3269   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3270   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3271   if (color) {
3272     ierr = PetscFree(xadj);CHKERRQ(ierr);
3273     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3274     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3275   } else {
3276     PetscInt coarsening_ratio;
3277     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3278     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3279     prank = rank;
3280     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3281     /*
3282     for (i=0;i<size;i++) {
3283       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3284     }
3285     */
3286     for (i=0;i<xadj[1];i++) {
3287       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3288     }
3289     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3290     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3291     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3292 
3293     /* Partition */
3294     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3295     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3296     if (use_vwgt) {
3297       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3298       v_wgt[0] = local_size;
3299       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3300     }
3301     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3302     coarsening_ratio = size/n_subdomains;
3303     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3304     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3305     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3306     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3307 
3308     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3309     if (contiguous) {
3310       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3311     } else {
3312       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3313     }
3314     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3315     /* clean up */
3316     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3317     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3318     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3319     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3320   }
3321   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3322 
3323   /* assemble parallel IS for sends */
3324   i = 1;
3325   if (color) i=0;
3326   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3327 
3328   /* get back IS */
3329   *is_sends = ranks_send_to;
3330   PetscFunctionReturn(0);
3331 }
3332 
3333 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3334 
3335 #undef __FUNCT__
3336 #define __FUNCT__ "MatISSubassemble"
3337 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3338 {
3339   Mat                    local_mat;
3340   Mat_IS                 *matis;
3341   IS                     is_sends_internal;
3342   PetscInt               rows,cols,new_local_rows;
3343   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3344   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3345   ISLocalToGlobalMapping l2gmap;
3346   PetscInt*              l2gmap_indices;
3347   const PetscInt*        is_indices;
3348   MatType                new_local_type;
3349   /* buffers */
3350   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3351   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3352   PetscInt               *recv_buffer_idxs_local;
3353   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3354   /* MPI */
3355   MPI_Comm               comm,comm_n;
3356   PetscSubcomm           subcomm;
3357   PetscMPIInt            n_sends,n_recvs,commsize;
3358   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3359   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3360   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3361   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3362   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3363   PetscErrorCode         ierr;
3364 
3365   PetscFunctionBegin;
3366   /* TODO: add missing checks */
3367   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3368   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3369   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3370   PetscValidLogicalCollectiveInt(mat,nis,7);
3371   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3372   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3373   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3374   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3375   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3376   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3377   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3378   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3379     PetscInt mrows,mcols,mnrows,mncols;
3380     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3381     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3382     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3383     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3384     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3385     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3386   }
3387   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3388   PetscValidLogicalCollectiveInt(mat,bs,0);
3389   /* prepare IS for sending if not provided */
3390   if (!is_sends) {
3391     PetscBool pcontig = PETSC_TRUE;
3392     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3393     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3394   } else {
3395     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3396     is_sends_internal = is_sends;
3397   }
3398 
3399   /* get pointer of MATIS data */
3400   matis = (Mat_IS*)mat->data;
3401 
3402   /* get comm */
3403   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3404 
3405   /* compute number of sends */
3406   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3407   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3408 
3409   /* compute number of receives */
3410   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3411   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3412   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3413   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3414   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3415   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3416   ierr = PetscFree(iflags);CHKERRQ(ierr);
3417 
3418   /* restrict comm if requested */
3419   subcomm = 0;
3420   destroy_mat = PETSC_FALSE;
3421   if (restrict_comm) {
3422     PetscMPIInt color,rank,subcommsize;
3423     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3424     color = 0;
3425     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3426     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3427     subcommsize = commsize - subcommsize;
3428     /* check if reuse has been requested */
3429     if (reuse == MAT_REUSE_MATRIX) {
3430       if (*mat_n) {
3431         PetscMPIInt subcommsize2;
3432         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3433         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3434         comm_n = PetscObjectComm((PetscObject)*mat_n);
3435       } else {
3436         comm_n = PETSC_COMM_SELF;
3437       }
3438     } else { /* MAT_INITIAL_MATRIX */
3439       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3440       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3441       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3442       comm_n = subcomm->comm;
3443     }
3444     /* flag to destroy *mat_n if not significative */
3445     if (color) destroy_mat = PETSC_TRUE;
3446   } else {
3447     comm_n = comm;
3448   }
3449 
3450   /* prepare send/receive buffers */
3451   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3452   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3453   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3454   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3455   if (nis) {
3456     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3457   }
3458 
3459   /* Get data from local matrices */
3460   if (!isdense) {
3461     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3462     /* TODO: See below some guidelines on how to prepare the local buffers */
3463     /*
3464        send_buffer_vals should contain the raw values of the local matrix
3465        send_buffer_idxs should contain:
3466        - MatType_PRIVATE type
3467        - PetscInt        size_of_l2gmap
3468        - PetscInt        global_row_indices[size_of_l2gmap]
3469        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3470     */
3471   } else {
3472     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3473     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3474     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3475     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3476     send_buffer_idxs[1] = i;
3477     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3478     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3479     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3480     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3481     for (i=0;i<n_sends;i++) {
3482       ilengths_vals[is_indices[i]] = len*len;
3483       ilengths_idxs[is_indices[i]] = len+2;
3484     }
3485   }
3486   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3487   /* additional is (if any) */
3488   if (nis) {
3489     PetscMPIInt psum;
3490     PetscInt j;
3491     for (j=0,psum=0;j<nis;j++) {
3492       PetscInt plen;
3493       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3494       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3495       psum += len+1; /* indices + lenght */
3496     }
3497     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3498     for (j=0,psum=0;j<nis;j++) {
3499       PetscInt plen;
3500       const PetscInt *is_array_idxs;
3501       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3502       send_buffer_idxs_is[psum] = plen;
3503       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3504       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3505       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3506       psum += plen+1; /* indices + lenght */
3507     }
3508     for (i=0;i<n_sends;i++) {
3509       ilengths_idxs_is[is_indices[i]] = psum;
3510     }
3511     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3512   }
3513 
3514   buf_size_idxs = 0;
3515   buf_size_vals = 0;
3516   buf_size_idxs_is = 0;
3517   for (i=0;i<n_recvs;i++) {
3518     buf_size_idxs += (PetscInt)olengths_idxs[i];
3519     buf_size_vals += (PetscInt)olengths_vals[i];
3520     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3521   }
3522   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3523   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3524   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3525 
3526   /* get new tags for clean communications */
3527   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3528   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3529   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3530 
3531   /* allocate for requests */
3532   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3533   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3534   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3535   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3536   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3537   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3538 
3539   /* communications */
3540   ptr_idxs = recv_buffer_idxs;
3541   ptr_vals = recv_buffer_vals;
3542   ptr_idxs_is = recv_buffer_idxs_is;
3543   for (i=0;i<n_recvs;i++) {
3544     source_dest = onodes[i];
3545     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3546     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3547     ptr_idxs += olengths_idxs[i];
3548     ptr_vals += olengths_vals[i];
3549     if (nis) {
3550       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3551       ptr_idxs_is += olengths_idxs_is[i];
3552     }
3553   }
3554   for (i=0;i<n_sends;i++) {
3555     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3556     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3557     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3558     if (nis) {
3559       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3560     }
3561   }
3562   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3563   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3564 
3565   /* assemble new l2g map */
3566   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3567   ptr_idxs = recv_buffer_idxs;
3568   new_local_rows = 0;
3569   for (i=0;i<n_recvs;i++) {
3570     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3571     ptr_idxs += olengths_idxs[i];
3572   }
3573   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3574   ptr_idxs = recv_buffer_idxs;
3575   new_local_rows = 0;
3576   for (i=0;i<n_recvs;i++) {
3577     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3578     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3579     ptr_idxs += olengths_idxs[i];
3580   }
3581   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3582   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3583   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3584 
3585   /* infer new local matrix type from received local matrices type */
3586   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3587   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3588   if (n_recvs) {
3589     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3590     ptr_idxs = recv_buffer_idxs;
3591     for (i=0;i<n_recvs;i++) {
3592       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3593         new_local_type_private = MATAIJ_PRIVATE;
3594         break;
3595       }
3596       ptr_idxs += olengths_idxs[i];
3597     }
3598     switch (new_local_type_private) {
3599       case MATDENSE_PRIVATE:
3600         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3601           new_local_type = MATSEQAIJ;
3602           bs = 1;
3603         } else { /* if I receive only 1 dense matrix */
3604           new_local_type = MATSEQDENSE;
3605           bs = 1;
3606         }
3607         break;
3608       case MATAIJ_PRIVATE:
3609         new_local_type = MATSEQAIJ;
3610         bs = 1;
3611         break;
3612       case MATBAIJ_PRIVATE:
3613         new_local_type = MATSEQBAIJ;
3614         break;
3615       case MATSBAIJ_PRIVATE:
3616         new_local_type = MATSEQSBAIJ;
3617         break;
3618       default:
3619         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3620         break;
3621     }
3622   } else { /* by default, new_local_type is seqdense */
3623     new_local_type = MATSEQDENSE;
3624     bs = 1;
3625   }
3626 
3627   /* create MATIS object if needed */
3628   if (reuse == MAT_INITIAL_MATRIX) {
3629     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3630     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3631   } else {
3632     /* it also destroys the local matrices */
3633     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3634   }
3635   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3636   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3637 
3638   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3639 
3640   /* Global to local map of received indices */
3641   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3642   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3643   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3644 
3645   /* restore attributes -> type of incoming data and its size */
3646   buf_size_idxs = 0;
3647   for (i=0;i<n_recvs;i++) {
3648     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3649     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3650     buf_size_idxs += (PetscInt)olengths_idxs[i];
3651   }
3652   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3653 
3654   /* set preallocation */
3655   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3656   if (!newisdense) {
3657     PetscInt *new_local_nnz=0;
3658 
3659     ptr_vals = recv_buffer_vals;
3660     ptr_idxs = recv_buffer_idxs_local;
3661     if (n_recvs) {
3662       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3663     }
3664     for (i=0;i<n_recvs;i++) {
3665       PetscInt j;
3666       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3667         for (j=0;j<*(ptr_idxs+1);j++) {
3668           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3669         }
3670       } else {
3671         /* TODO */
3672       }
3673       ptr_idxs += olengths_idxs[i];
3674     }
3675     if (new_local_nnz) {
3676       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3677       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3678       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3679       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3680       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3681       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3682     } else {
3683       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3684     }
3685     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3686   } else {
3687     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3688   }
3689 
3690   /* set values */
3691   ptr_vals = recv_buffer_vals;
3692   ptr_idxs = recv_buffer_idxs_local;
3693   for (i=0;i<n_recvs;i++) {
3694     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3695       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3696       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3697       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3698       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3699       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3700     } else {
3701       /* TODO */
3702     }
3703     ptr_idxs += olengths_idxs[i];
3704     ptr_vals += olengths_vals[i];
3705   }
3706   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3708   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3709   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3710   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3711   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3712 
3713 #if 0
3714   if (!restrict_comm) { /* check */
3715     Vec       lvec,rvec;
3716     PetscReal infty_error;
3717 
3718     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3719     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3720     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3721     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3722     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3723     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3724     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3725     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3726     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3727   }
3728 #endif
3729 
3730   /* assemble new additional is (if any) */
3731   if (nis) {
3732     PetscInt **temp_idxs,*count_is,j,psum;
3733 
3734     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3735     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3736     ptr_idxs = recv_buffer_idxs_is;
3737     psum = 0;
3738     for (i=0;i<n_recvs;i++) {
3739       for (j=0;j<nis;j++) {
3740         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3741         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3742         psum += plen;
3743         ptr_idxs += plen+1; /* shift pointer to received data */
3744       }
3745     }
3746     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3747     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3748     for (i=1;i<nis;i++) {
3749       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3750     }
3751     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3752     ptr_idxs = recv_buffer_idxs_is;
3753     for (i=0;i<n_recvs;i++) {
3754       for (j=0;j<nis;j++) {
3755         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3756         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3757         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3758         ptr_idxs += plen+1; /* shift pointer to received data */
3759       }
3760     }
3761     for (i=0;i<nis;i++) {
3762       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3763       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3764       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3765     }
3766     ierr = PetscFree(count_is);CHKERRQ(ierr);
3767     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3768     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3769   }
3770   /* free workspace */
3771   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3772   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3773   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3774   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3775   if (isdense) {
3776     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3777     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3778   } else {
3779     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3780   }
3781   if (nis) {
3782     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3783     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3784   }
3785   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3786   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3787   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3788   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3789   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3790   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3791   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3792   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3793   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3794   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3795   ierr = PetscFree(onodes);CHKERRQ(ierr);
3796   if (nis) {
3797     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3798     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3799     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3800   }
3801   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3802   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3803     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3804     for (i=0;i<nis;i++) {
3805       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3806     }
3807   }
3808   PetscFunctionReturn(0);
3809 }
3810 
3811 /* temporary hack into ksp private data structure */
3812 #include <petsc-private/kspimpl.h>
3813 
3814 #undef __FUNCT__
3815 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3816 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3817 {
3818   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3819   PC_IS                  *pcis = (PC_IS*)pc->data;
3820   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3821   MatNullSpace           CoarseNullSpace=NULL;
3822   ISLocalToGlobalMapping coarse_islg;
3823   IS                     coarse_is,*isarray;
3824   PetscInt               i,im_active=-1,active_procs=-1;
3825   PetscInt               nis,nisdofs,nisneu;
3826   PC                     pc_temp;
3827   PCType                 coarse_pc_type;
3828   KSPType                coarse_ksp_type;
3829   PetscBool              multilevel_requested,multilevel_allowed;
3830   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3831   Mat                    t_coarse_mat_is;
3832   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3833   PetscMPIInt            all_procs;
3834   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3835   PetscBool              compute_vecs = PETSC_FALSE;
3836   PetscScalar            *array;
3837   PetscErrorCode         ierr;
3838 
3839   PetscFunctionBegin;
3840   /* Assign global numbering to coarse dofs */
3841   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3842     compute_vecs = PETSC_TRUE;
3843     PetscInt ocoarse_size;
3844     ocoarse_size = pcbddc->coarse_size;
3845     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3846     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3847     /* see if we can avoid some work */
3848     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3849       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3850         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3851         coarse_reuse = PETSC_FALSE;
3852       } else { /* we can safely reuse already computed coarse matrix */
3853         coarse_reuse = PETSC_TRUE;
3854       }
3855     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3856       coarse_reuse = PETSC_FALSE;
3857     }
3858     /* reset any subassembling information */
3859     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3860     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3861   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3862     coarse_reuse = PETSC_TRUE;
3863   }
3864 
3865   /* count "active" (i.e. with positive local size) and "void" processes */
3866   im_active = !!(pcis->n);
3867   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3868   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3869   void_procs = all_procs-active_procs;
3870   csin_type_simple = PETSC_TRUE;
3871   redist = PETSC_FALSE;
3872   if (pcbddc->current_level && void_procs) {
3873     csin_ml = PETSC_TRUE;
3874     ncoarse_ml = void_procs;
3875     csin_ds = PETSC_TRUE;
3876     ncoarse_ds = void_procs;
3877   } else {
3878     csin_ml = PETSC_FALSE;
3879     ncoarse_ml = all_procs;
3880     if (void_procs) {
3881       csin_ds = PETSC_TRUE;
3882       ncoarse_ds = void_procs;
3883       csin_type_simple = PETSC_FALSE;
3884     } else {
3885       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3886         csin_ds = PETSC_TRUE;
3887         ncoarse_ds = pcbddc->redistribute_coarse;
3888         redist = PETSC_TRUE;
3889       } else {
3890         csin_ds = PETSC_FALSE;
3891         ncoarse_ds = all_procs;
3892       }
3893     }
3894   }
3895 
3896   /*
3897     test if we can go multilevel: three conditions must be satisfied:
3898     - we have not exceeded the number of levels requested
3899     - we can actually subassemble the active processes
3900     - we can find a suitable number of MPI processes where we can place the subassembled problem
3901   */
3902   multilevel_allowed = PETSC_FALSE;
3903   multilevel_requested = PETSC_FALSE;
3904   if (pcbddc->current_level < pcbddc->max_levels) {
3905     multilevel_requested = PETSC_TRUE;
3906     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3907       multilevel_allowed = PETSC_FALSE;
3908     } else {
3909       multilevel_allowed = PETSC_TRUE;
3910     }
3911   }
3912   /* determine number of process partecipating to coarse solver */
3913   if (multilevel_allowed) {
3914     ncoarse = ncoarse_ml;
3915     csin = csin_ml;
3916   } else {
3917     ncoarse = ncoarse_ds;
3918     csin = csin_ds;
3919   }
3920 
3921   /* creates temporary l2gmap and IS for coarse indexes */
3922   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3923   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3924 
3925   /* creates temporary MATIS object for coarse matrix */
3926   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3927   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3928   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3929   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3930 #if 0
3931   {
3932     PetscViewer viewer;
3933     char filename[256];
3934     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3935     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3936     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3937     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3938     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3939   }
3940 #endif
3941   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
3942   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3943   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3944   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3945   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3946 
3947   /* compute dofs splitting and neumann boundaries for coarse dofs */
3948   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
3949     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3950     const PetscInt         *idxs;
3951     ISLocalToGlobalMapping tmap;
3952 
3953     /* create map between primal indices (in local representative ordering) and local primal numbering */
3954     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3955     /* allocate space for temporary storage */
3956     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
3957     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
3958     /* allocate for IS array */
3959     nisdofs = pcbddc->n_ISForDofsLocal;
3960     nisneu = !!pcbddc->NeumannBoundariesLocal;
3961     nis = nisdofs + nisneu;
3962     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
3963     /* dofs splitting */
3964     for (i=0;i<nisdofs;i++) {
3965       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3966       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3967       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3968       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3969       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3970       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3971       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3972       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
3973     }
3974     /* neumann boundaries */
3975     if (pcbddc->NeumannBoundariesLocal) {
3976       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3977       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3978       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3979       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3980       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3981       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3982       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
3983       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
3984     }
3985     /* free memory */
3986     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3987     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3988     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3989   } else {
3990     nis = 0;
3991     nisdofs = 0;
3992     nisneu = 0;
3993     isarray = NULL;
3994   }
3995   /* destroy no longer needed map */
3996   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3997 
3998   /* restrict on coarse candidates (if needed) */
3999   coarse_mat_is = NULL;
4000   if (csin) {
4001     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4002       if (redist) {
4003         PetscMPIInt rank;
4004         PetscInt spc,n_spc_p1,dest[1];
4005 
4006         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4007         spc = all_procs/pcbddc->redistribute_coarse;
4008         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
4009         if (rank > n_spc_p1*(spc+1)-1) {
4010           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4011         } else {
4012           dest[0] = rank/(spc+1);
4013         }
4014         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4015       } else {
4016         PetscInt j,tissize,*nisindices;
4017         PetscInt *coarse_candidates;
4018         const PetscInt* tisindices;
4019         /* get coarse candidates' ranks in pc communicator */
4020         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4021         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4022         for (i=0,j=0;i<all_procs;i++) {
4023           if (!coarse_candidates[i]) {
4024             coarse_candidates[j]=i;
4025             j++;
4026           }
4027         }
4028         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
4029         /* get a suitable subassembling pattern */
4030         if (csin_type_simple) {
4031           PetscMPIInt rank;
4032           PetscInt    issize,isidx;
4033           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4034           if (im_active) {
4035             issize = 1;
4036             isidx = (PetscInt)rank;
4037           } else {
4038             issize = 0;
4039             isidx = -1;
4040           }
4041           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4042         } else {
4043           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4044         }
4045         if (pcbddc->dbg_flag) {
4046           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4047           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4048           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4049           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4050           for (i=0;i<j;i++) {
4051             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4052           }
4053           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4054           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4055         }
4056         /* shift the pattern on coarse candidates */
4057         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4058         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4059         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4060         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4061         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4062         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4063         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4064       }
4065     }
4066     if (pcbddc->dbg_flag) {
4067       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4068       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4069       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4070       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4071     }
4072     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4073     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4074   } else {
4075     if (pcbddc->dbg_flag) {
4076       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4077       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4078       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4079     }
4080     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4081     coarse_mat_is = t_coarse_mat_is;
4082   }
4083 
4084   /* create local to global scatters for coarse problem */
4085   if (compute_vecs) {
4086     PetscInt lrows;
4087     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4088     if (coarse_mat_is) {
4089       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4090     } else {
4091       lrows = 0;
4092     }
4093     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4094     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4095     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4096     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4097     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4098   }
4099   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4100   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4101 
4102   /* set defaults for coarse KSP and PC */
4103   if (multilevel_allowed) {
4104     coarse_ksp_type = KSPRICHARDSON;
4105     coarse_pc_type = PCBDDC;
4106   } else {
4107     coarse_ksp_type = KSPPREONLY;
4108     coarse_pc_type = PCREDUNDANT;
4109   }
4110 
4111   /* print some info if requested */
4112   if (pcbddc->dbg_flag) {
4113     if (!multilevel_allowed) {
4114       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4115       if (multilevel_requested) {
4116         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4117       } else if (pcbddc->max_levels) {
4118         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4119       }
4120       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4121     }
4122   }
4123 
4124   /* create the coarse KSP object only once with defaults */
4125   if (coarse_mat_is) {
4126     MatReuse coarse_mat_reuse;
4127     PetscViewer dbg_viewer = NULL;
4128     if (pcbddc->dbg_flag) {
4129       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4130       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4131     }
4132     if (!pcbddc->coarse_ksp) {
4133       char prefix[256],str_level[16];
4134       size_t len;
4135       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4136       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4137       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4138       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4139       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4140       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4141       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4142       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4143       /* prefix */
4144       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4145       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4146       if (!pcbddc->current_level) {
4147         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4148         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4149       } else {
4150         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4151         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4152         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4153         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4154         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4155         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4156       }
4157       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4158       /* allow user customization */
4159       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4160       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4161     }
4162 
4163     /* get some info after set from options */
4164     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4165     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4166     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4167     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4168     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4169       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4170       isbddc = PETSC_FALSE;
4171     }
4172     if (isredundant) {
4173       KSP inner_ksp;
4174       PC inner_pc;
4175       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4176       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4177       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4178     }
4179 
4180     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4181     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4182     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4183     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4184     if (nisdofs) {
4185       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4186       for (i=0;i<nisdofs;i++) {
4187         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4188       }
4189     }
4190     if (nisneu) {
4191       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4192       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4193     }
4194 
4195     /* assemble coarse matrix */
4196     if (coarse_reuse) {
4197       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4198       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4199       coarse_mat_reuse = MAT_REUSE_MATRIX;
4200     } else {
4201       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4202     }
4203     if (isbddc || isnn) {
4204       if (pcbddc->coarsening_ratio > 1) {
4205         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4206           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4207           if (pcbddc->dbg_flag) {
4208             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4209             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4210             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4211             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4212           }
4213         }
4214         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4215       } else {
4216         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4217         coarse_mat = coarse_mat_is;
4218       }
4219     } else {
4220       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4221     }
4222     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4223 
4224     /* propagate symmetry info to coarse matrix */
4225     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4226     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4227 
4228     /* set operators */
4229     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4230     if (pcbddc->dbg_flag) {
4231       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4232     }
4233   } else { /* processes non partecipating to coarse solver (if any) */
4234     coarse_mat = 0;
4235   }
4236   ierr = PetscFree(isarray);CHKERRQ(ierr);
4237 #if 0
4238   {
4239     PetscViewer viewer;
4240     char filename[256];
4241     sprintf(filename,"coarse_mat.m");
4242     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4243     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4244     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4245     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4246   }
4247 #endif
4248 
4249   /* Compute coarse null space (special handling by BDDC only) */
4250   if (pcbddc->NullSpace) {
4251     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4252   }
4253 
4254   if (pcbddc->coarse_ksp) {
4255     Vec crhs,csol;
4256     PetscBool ispreonly;
4257     if (CoarseNullSpace) {
4258       if (isbddc) {
4259         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4260       } else {
4261         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4262       }
4263     }
4264     /* setup coarse ksp */
4265     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4266     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4267     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4268     /* hack */
4269     if (!csol) {
4270       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4271     }
4272     if (!crhs) {
4273       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4274     }
4275     /* Check coarse problem if in debug mode or if solving with an iterative method */
4276     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4277     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4278       KSP       check_ksp;
4279       KSPType   check_ksp_type;
4280       PC        check_pc;
4281       Vec       check_vec,coarse_vec;
4282       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4283       PetscInt  its;
4284       PetscBool compute_eigs;
4285       PetscReal *eigs_r,*eigs_c;
4286       PetscInt  neigs;
4287       const char *prefix;
4288 
4289       /* Create ksp object suitable for estimation of extreme eigenvalues */
4290       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4291       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4292       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4293       if (ispreonly) {
4294         check_ksp_type = KSPPREONLY;
4295         compute_eigs = PETSC_FALSE;
4296       } else {
4297         check_ksp_type = KSPGMRES;
4298         compute_eigs = PETSC_TRUE;
4299       }
4300       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4301       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4302       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4303       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4304       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4305       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4306       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4307       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4308       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4309       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4310       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4311       /* create random vec */
4312       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4313       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4314       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4315       if (CoarseNullSpace) {
4316         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4317       }
4318       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4319       /* solve coarse problem */
4320       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4321       if (CoarseNullSpace) {
4322         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4323       }
4324       /* set eigenvalue estimation if preonly has not been requested */
4325       if (compute_eigs) {
4326         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4327         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4328         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4329         lambda_max = eigs_r[neigs-1];
4330         lambda_min = eigs_r[0];
4331         if (pcbddc->use_coarse_estimates) {
4332           if (lambda_max>lambda_min) {
4333             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4334             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4335           }
4336         }
4337       }
4338 
4339       /* check coarse problem residual error */
4340       if (pcbddc->dbg_flag) {
4341         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4342         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4343         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4344         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4345         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4346         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4347         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4348         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4349         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4350         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4351         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4352         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4353         if (compute_eigs) {
4354           PetscReal lambda_max_s,lambda_min_s;
4355           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4356           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4357           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4358           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4359           for (i=0;i<neigs;i++) {
4360             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4361           }
4362         }
4363         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4364         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4365       }
4366       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4367       if (compute_eigs) {
4368         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4369         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4370       }
4371     }
4372   }
4373   /* print additional info */
4374   if (pcbddc->dbg_flag) {
4375     /* waits until all processes reaches this point */
4376     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4377     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4378     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4379   }
4380 
4381   /* free memory */
4382   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4383   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 #undef __FUNCT__
4388 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4389 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4390 {
4391   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4392   PC_IS*         pcis = (PC_IS*)pc->data;
4393   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4394   PetscInt       i,coarse_size;
4395   PetscInt       *local_primal_indices;
4396   PetscErrorCode ierr;
4397 
4398   PetscFunctionBegin;
4399   /* Compute global number of coarse dofs */
4400   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4401     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4402   }
4403   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4404 
4405   /* check numbering */
4406   if (pcbddc->dbg_flag) {
4407     PetscScalar coarsesum,*array;
4408     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4409 
4410     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4411     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4412     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4413     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4414     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4415     for (i=0;i<pcbddc->local_primal_size;i++) {
4416       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4417     }
4418     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4419     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4420     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4421     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4422     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4423     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4424     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4425     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4426     for (i=0;i<pcis->n;i++) {
4427       if (array[i] == 1.0) {
4428         set_error = PETSC_TRUE;
4429         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4430       }
4431     }
4432     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4433     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4434     for (i=0;i<pcis->n;i++) {
4435       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4436     }
4437     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4438     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4439     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4440     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4441     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4442     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4443     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4444       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4445       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4446       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4447       for (i=0;i<pcbddc->local_primal_size;i++) {
4448         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4449       }
4450       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4451     }
4452     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4453     if (set_error_reduced) {
4454       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4455     }
4456   }
4457   /* get back data */
4458   *coarse_size_n = coarse_size;
4459   *local_primal_indices_n = local_primal_indices;
4460   PetscFunctionReturn(0);
4461 }
4462 
4463 #undef __FUNCT__
4464 #define __FUNCT__ "PCBDDCGlobalToLocal"
4465 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4466 {
4467   IS             localis_t;
4468   PetscInt       i,lsize,*idxs,n;
4469   PetscScalar    *vals;
4470   PetscErrorCode ierr;
4471 
4472   PetscFunctionBegin;
4473   /* get indices in local ordering exploiting local to global map */
4474   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4475   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4476   for (i=0;i<lsize;i++) vals[i] = 1.0;
4477   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4478   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4479   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4480   if (idxs) { /* multilevel guard */
4481     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4482   }
4483   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4484   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4485   ierr = PetscFree(vals);CHKERRQ(ierr);
4486   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4487   /* now compute set in local ordering */
4488   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4489   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4490   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4491   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4492   for (i=0,lsize=0;i<n;i++) {
4493     if (PetscRealPart(vals[i]) > 0.5) {
4494       lsize++;
4495     }
4496   }
4497   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4498   for (i=0,lsize=0;i<n;i++) {
4499     if (PetscRealPart(vals[i]) > 0.5) {
4500       idxs[lsize++] = i;
4501     }
4502   }
4503   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4504   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4505   *localis = localis_t;
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4510 #undef __FUNCT__
4511 #define __FUNCT__ "PCBDDCMatMult_Private"
4512 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4513 {
4514   PCBDDCChange_ctx change_ctx;
4515   PetscErrorCode   ierr;
4516 
4517   PetscFunctionBegin;
4518   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4519   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4520   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4521   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4522   PetscFunctionReturn(0);
4523 }
4524 
4525 #undef __FUNCT__
4526 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4527 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4528 {
4529   PCBDDCChange_ctx change_ctx;
4530   PetscErrorCode   ierr;
4531 
4532   PetscFunctionBegin;
4533   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4534   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4535   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4536   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4537   PetscFunctionReturn(0);
4538 }
4539 
4540 #undef __FUNCT__
4541 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4542 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4543 {
4544   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4545   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4546   PetscInt            *used_xadj,*used_adjncy;
4547   PetscBool           free_used_adj;
4548   PetscErrorCode      ierr;
4549 
4550   PetscFunctionBegin;
4551   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4552   free_used_adj = PETSC_FALSE;
4553   if (pcbddc->sub_schurs_layers == -1) {
4554     used_xadj = NULL;
4555     used_adjncy = NULL;
4556   } else {
4557     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4558       used_xadj = pcbddc->mat_graph->xadj;
4559       used_adjncy = pcbddc->mat_graph->adjncy;
4560     } else if (pcbddc->computed_rowadj) {
4561       used_xadj = pcbddc->mat_graph->xadj;
4562       used_adjncy = pcbddc->mat_graph->adjncy;
4563     } else {
4564       Mat            mat_adj;
4565       PetscBool      flg_row=PETSC_TRUE;
4566       const PetscInt *xadj,*adjncy;
4567       PetscInt       nvtxs;
4568 
4569       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4570       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4571       if (!flg_row) {
4572         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4573       }
4574       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4575       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4576       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4577       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4578       if (!flg_row) {
4579         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4580       }
4581       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4582       free_used_adj = PETSC_TRUE;
4583     }
4584   }
4585   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4586 
4587   /* free adjacency */
4588   if (free_used_adj) {
4589     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4590   }
4591   PetscFunctionReturn(0);
4592 }
4593 
4594 #undef __FUNCT__
4595 #define __FUNCT__ "PCBDDCInitSubSchurs"
4596 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4597 {
4598   PC_IS               *pcis=(PC_IS*)pc->data;
4599   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4600   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4601   PCBDDCGraph         graph;
4602   Mat                 S_j;
4603   PetscErrorCode      ierr;
4604 
4605   PetscFunctionBegin;
4606   /* attach interface graph for determining subsets */
4607   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4608     IS verticesIS;
4609 
4610     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4611     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4612     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4613     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4614     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4615     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4616 /*
4617     if (pcbddc->dbg_flag) {
4618       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4619     }
4620 */
4621   } else {
4622     graph = pcbddc->mat_graph;
4623   }
4624 
4625   /* Create Schur complement matrix */
4626   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4627   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4628 
4629   /* sub_schurs init */
4630   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4631   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4632   /* free graph struct */
4633   if (pcbddc->sub_schurs_rebuild) {
4634     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4635   }
4636   PetscFunctionReturn(0);
4637 }
4638