xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 066566057029b00256acb5d674ad5b0859717d23)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscScalar     *Smult,*Seigv;
20   PetscReal       *eigs,thresh;
21   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs,maxneigs_r,cumsubs;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscBool       symmetric;
26   PetscErrorCode  ierr;
27 
28   PetscFunctionBegin;
29   if (!sub_schurs->use_mumps) {
30     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
31   }
32   ierr = MatIsSymmetric(sub_schurs->A,0.0,&symmetric);CHKERRQ(ierr);
33   if (!symmetric) {
34     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Not yet implemented");
35   }
36 
37   /* max size of subsets */
38   mss = 0;
39   for (i=0;i<sub_schurs->n_subs;i++) {
40     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
41       PetscInt subset_size;
42       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
43       mss = PetscMax(mss,subset_size);
44     }
45   }
46 
47   /* min/max and threshold */
48   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
49   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 1;
50   nmax = PetscMax(nmin,nmax);
51   if (pcbddc->adaptive_threshold > 1.0) {
52     thresh = 1.0/pcbddc->adaptive_threshold;
53   } else {
54     thresh = 1.0;
55   }
56 
57   /* allocate lapack workspace */
58   cum = cum2 = 0;
59   maxneigs = 0;
60   for (i=0;i<sub_schurs->n_subs;i++) {
61     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
62       PetscInt n,subset_size;
63       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
64       n = PetscMin(subset_size,nmax);
65       n = PetscMax(n,nmin);
66       cum += subset_size*n;
67       cum2 += n;
68       maxneigs = PetscMax(maxneigs,n);
69     }
70   }
71 
72   if (mss) {
73     if (symmetric) {
74       PetscBLASInt B_itype = 1;
75       PetscBLASInt B_N = mss;
76       PetscScalar  zero = 0.0;
77       PetscScalar  eps = 0.0; /* dlamch? */
78 
79       B_lwork = -1;
80       S = NULL;
81       St = NULL;
82       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
83 #if defined(PETSC_USE_COMPLEX)
84       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
85 #else
86       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
87 #endif
88       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
89       ierr = PetscFPTrapPop();CHKERRQ(ierr);
90     } else {
91         /* TODO */
92     }
93   } else {
94     lwork = 0;
95   }
96 
97   nv = 0;
98   if (sub_schurs->is_Ej_com) { /* complement of subsets, each entry is a vertex */
99     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
100   }
101   ierr = PetscBLASIntCast((PetscInt)lwork,&B_lwork);CHKERRQ(ierr);
102   ierr = PetscMalloc7(mss*mss,&S,mss*mss,&St,mss*mss,&eigv,mss,&eigs,
103                       B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
104 #if defined(PETSC_USE_COMPLEX)
105   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
106 #endif
107   ierr = PetscMalloc2(mss*mss,&Smult,mss*mss,&Seigv);CHKERRQ(ierr);
108   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
109                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
110                       nv+cum,&pcbddc->adaptive_constraints_idxs,
111                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
112   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
113 
114   maxneigs = 0;
115   cum = cum2 = cumarray = 0;
116   if (sub_schurs->is_Ej_com) {
117     const PetscInt *idxs;
118 
119     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
120     for (cum=0;cum<nv;cum++) {
121       pcbddc->adaptive_constraints_n[cum] = 1;
122       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
123       pcbddc->adaptive_constraints_ptrs[cum] = cum;
124       pcbddc->adaptive_constraints_data[cum] = 1.0;
125     }
126     cum2 = cum;
127     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
128   }
129 
130   if (mss) { /* multilevel */
131     if (pcbddc->use_deluxe_scaling) {
132       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
133     } else {
134       ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
135     }
136     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
137   }
138 
139   cumsubs = 0;
140   for (i=0;i<sub_schurs->n_subs;i++) {
141     PetscInt j,subset_size;
142 
143     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
144     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
145       const PetscInt *idxs;
146       PetscScalar    one = 1.0,zero=0.0;
147       PetscBLASInt   B_N;
148 
149       /* S should be copied since we need it for deluxe scaling */
150       if (symmetric) {
151         PetscInt j;
152         for (j=0;j<subset_size;j++) {
153           ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
154         }
155         for (j=0;j<subset_size;j++) {
156           ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
157         }
158       } else {
159         ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
160         ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
161       }
162       /* is always this the right matrix? */
163       ierr = PetscMemcpy(Smult,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
164 
165       /* we could reuse space already allocated when building sum_S_Ej_tilda_all */
166       /* St = Starray+cumarray; */
167 
168       if (symmetric) {
169         PetscBLASInt B_itype = 1;
170         PetscBLASInt B_IL = 1, B_IU;
171         PetscScalar  eps = -1.0; /* dlamch? */
172 
173         /* ask for eigenvalues lower than thresh */
174         PetscPrintf(PETSC_COMM_SELF,"[%d] Computing for sub %d/%d.\n",PetscGlobalRank,i,sub_schurs->n_subs);
175         ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
176         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
177 #if defined(PETSC_USE_COMPLEX)
178         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
179 #else
180         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
181 #endif
182         ierr = PetscFPTrapPop();CHKERRQ(ierr);
183         if (B_ierr) {
184           if (B_ierr < 0 ) {
185             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
186           } else if (B_ierr <= B_N) {
187             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
188           } else {
189             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
190           }
191         }
192 
193         if (B_neigs > nmax) {
194           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, more than required %d.\n",PetscGlobalRank,B_neigs,nmax);
195           B_neigs = nmax;
196         }
197 
198         if (B_neigs < nmin) {
199           PetscBLASInt B_neigs2;
200 
201           B_IL = B_neigs + 1;
202           ierr = PetscBLASIntCast(nmin,&B_IU);CHKERRQ(ierr);
203           PetscPrintf(PETSC_COMM_SELF,"[%d]   found %d eigs, less than required %d. Asking for %d to %d incl (fortran like)\n",PetscGlobalRank,B_neigs,nmin,B_IL,B_IU);
204           if (symmetric) {
205             PetscInt j;
206             for (j=0;j<subset_size;j++) {
207               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
208             }
209             for (j=0;j<subset_size;j++) {
210               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
211             }
212           } else {
213             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
214             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
215           }
216           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
217 #if defined(PETSC_USE_COMPLEX)
218           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
219 #else
220           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
221 #endif
222           ierr = PetscFPTrapPop();CHKERRQ(ierr);
223           B_neigs += B_neigs2;
224         }
225         if (B_ierr) {
226           if (B_ierr < 0 ) {
227             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
228           } else if (B_ierr <= B_N) {
229             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
230           } else {
231             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d principal minor not posdef",(int)B_ierr-B_N-1);
232           }
233         }
234         PetscPrintf(PETSC_COMM_SELF,"[%d]   -> Got %d eigs\n",PetscGlobalRank,B_neigs);
235         for (j=0;j<B_neigs;j++) {
236           if (eigs[j] == 0.0) {
237             PetscPrintf(PETSC_COMM_SELF,"[%d]     Inf\n",PetscGlobalRank);
238           } else {
239             PetscPrintf(PETSC_COMM_SELF,"[%d]     %1.6e\n",PetscGlobalRank,1.0/eigs[j]);
240           }
241         }
242       } else {
243           /* TODO */
244       }
245       maxneigs = PetscMax(B_neigs,maxneigs);
246       pcbddc->adaptive_constraints_n[cumsubs+nv] = B_neigs;
247       cumsubs++;
248 
249       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
250       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&B_N,&B_neigs,&B_N,&one,Smult,&B_N,eigv,&B_N,&zero,Seigv,&B_N));
251       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,Seigv,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
253       for (j=0;j<B_neigs;j++) {
254 #if 0
255         {
256           PetscBLASInt Blas_N,Blas_one = 1.0;
257           PetscScalar norm;
258           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
259           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
260           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
261             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
262           } else {
263             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
264           }
265           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
266         }
267 #endif
268         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
269         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
270         cum2 += subset_size;
271       }
272       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
273     }
274     /* shift for next computation */
275     cumarray += subset_size*subset_size;
276   }
277   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
278   ierr = PetscFree2(Smult,Seigv);CHKERRQ(ierr);
279 
280   if (mss) {
281     if (pcbddc->use_deluxe_scaling) {
282       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
283     } else {
284       ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&Sarray);CHKERRQ(ierr);
285     }
286     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
287   }
288   ierr = PetscFree7(S,St,eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
289 #if defined(PETSC_USE_COMPLEX)
290   ierr = PetscFree(rwork);CHKERRQ(ierr);
291 #endif
292   if (pcbddc->dbg_flag) {
293     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
294     ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
295   }
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNCT__
300 #define __FUNCT__ "PCBDDCSetUpSolvers"
301 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
302 {
303   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
304   PetscScalar    *coarse_submat_vals;
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   /* Setup local scatters R_to_B and (optionally) R_to_D */
309   /* PCBDDCSetUpLocalWorkVectors should be called first! */
310   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
311 
312   /* Setup local neumann solver ksp_R */
313   /* PCBDDCSetUpLocalScatters should be called first! */
314   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
315 
316   /* Change global null space passed in by the user if change of basis has been requested */
317   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
318     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
319   }
320 
321   /*
322      Setup local correction and local part of coarse basis.
323      Gives back the dense local part of the coarse matrix in column major ordering
324   */
325   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
326 
327   /* Compute total number of coarse nodes and setup coarse solver */
328   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
329 
330   /* free */
331   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "PCBDDCResetCustomization"
337 PetscErrorCode PCBDDCResetCustomization(PC pc)
338 {
339   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
344   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
345   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
346   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
347   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
348   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
349   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
350   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
351   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
352   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
353   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "PCBDDCResetTopography"
359 PetscErrorCode PCBDDCResetTopography(PC pc)
360 {
361   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
362   PetscErrorCode ierr;
363 
364   PetscFunctionBegin;
365   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
366   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
367   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
368   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
369   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
370   PetscFunctionReturn(0);
371 }
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "PCBDDCResetSolvers"
375 PetscErrorCode PCBDDCResetSolvers(PC pc)
376 {
377   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
378   PetscScalar    *array;
379   PetscErrorCode ierr;
380 
381   PetscFunctionBegin;
382   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
383   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
384   ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
385   ierr = PetscFree(array);CHKERRQ(ierr);
386   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
387   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
388   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
389   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
390   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
391   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
392   if (pcbddc->local_auxmat2) {
393     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
394     ierr = PetscFree(array);CHKERRQ(ierr);
395   }
396   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
397   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
398   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
399   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
400   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
401   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
402   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
403   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
404   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
405   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
406   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
407   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
408   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
409   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
410   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
411   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
417 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
418 {
419   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
420   PC_IS          *pcis = (PC_IS*)pc->data;
421   VecType        impVecType;
422   PetscInt       n_constraints,n_R,old_size;
423   PetscErrorCode ierr;
424 
425   PetscFunctionBegin;
426   if (!pcbddc->ConstraintMatrix) {
427     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
428   }
429   /* get sizes */
430   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
431   n_R = pcis->n-pcbddc->n_actual_vertices;
432   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
433   /* local work vectors (try to avoid unneeded work)*/
434   /* R nodes */
435   old_size = -1;
436   if (pcbddc->vec1_R) {
437     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
438   }
439   if (n_R != old_size) {
440     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
441     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
442     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
443     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
444     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
445     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
446   }
447   /* local primal dofs */
448   old_size = -1;
449   if (pcbddc->vec1_P) {
450     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
451   }
452   if (pcbddc->local_primal_size != old_size) {
453     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
454     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
455     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
456     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
457   }
458   /* local explicit constraints */
459   old_size = -1;
460   if (pcbddc->vec1_C) {
461     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
462   }
463   if (n_constraints && n_constraints != old_size) {
464     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
465     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
466     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
467     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
468   }
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "PCBDDCSetUpCorrection"
474 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
475 {
476   PetscErrorCode         ierr;
477   /* pointers to pcis and pcbddc */
478   PC_IS*                 pcis = (PC_IS*)pc->data;
479   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
480   /* submatrices of local problem */
481   Mat                    A_RV,A_VR,A_VV;
482   /* submatrices of local coarse problem */
483   Mat                    S_VV,S_CV,S_VC,S_CC;
484   /* working matrices */
485   Mat                    C_CR;
486   /* additional working stuff */
487   PC                     pc_R;
488   Mat                    F;
489   PetscBool              isLU,isCHOL,isILU;
490 
491   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
492   PetscScalar            *work;
493   PetscInt               *idx_V_B;
494   PetscInt               n,n_vertices,n_constraints;
495   PetscInt               i,n_R,n_D,n_B;
496   PetscBool              unsymmetric_check;
497   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
498   MatType                impMatType;
499   /* some shortcuts to scalars */
500   PetscScalar            one=1.0,m_one=-1.0;
501 
502   PetscFunctionBegin;
503   /* get number of vertices (corners plus constraints with change of basis)
504      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
505   n_vertices = pcbddc->n_actual_vertices;
506   n_constraints = pcbddc->local_primal_size-n_vertices;
507   /* Set Non-overlapping dimensions */
508   n_B = pcis->n_B; n_D = pcis->n - n_B;
509   n_R = pcis->n-n_vertices;
510 
511   /* Set types for local objects needed by BDDC precondtioner */
512   impMatType = MATSEQDENSE;
513 
514   /* vertices in boundary numbering */
515   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
516   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
517   if (i != n_vertices) {
518     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
519   }
520 
521   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
522   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
523   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
524   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
525   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
526   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
527   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
528   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
529   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
530   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
531 
532   unsymmetric_check = PETSC_FALSE;
533   /* allocate workspace */
534   n = 0;
535   if (n_constraints) {
536     n += n_R*n_constraints;
537   }
538   if (n_vertices) {
539     n = PetscMax(2*n_R*n_vertices,n);
540   }
541   if (!pcbddc->issym) {
542     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
543     unsymmetric_check = PETSC_TRUE;
544   }
545   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
546 
547   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
548   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
549   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
550   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
551   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
552   if (isLU || isILU || isCHOL) {
553     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
554   } else {
555     F = NULL;
556   }
557 
558   /* Precompute stuffs needed for preprocessing and application of BDDC*/
559   if (n_constraints) {
560     Mat M1,M2,M3;
561     IS  is_aux;
562     /* see if we can save some allocations */
563     if (pcbddc->local_auxmat2) {
564       PetscInt on_R,on_constraints;
565       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
566       if (on_R != n_R || on_constraints != n_constraints) {
567         PetscScalar *marray;
568 
569         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
570         ierr = PetscFree(marray);CHKERRQ(ierr);
571         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
572         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
573       }
574     }
575     /* auxiliary matrices */
576     if (!pcbddc->local_auxmat2) {
577       PetscScalar *marray;
578 
579       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
580       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
581       marray += n_R*n_constraints;
582       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
583     }
584 
585     /* Extract constraints on R nodes: C_{CR}  */
586     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
587     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
588     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
589 
590     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
591     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
592     for (i=0;i<n_constraints;i++) {
593       const PetscScalar *row_cmat_values;
594       const PetscInt    *row_cmat_indices;
595       PetscInt          size_of_constraint,j;
596 
597       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
598       for (j=0;j<size_of_constraint;j++) {
599         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
600       }
601       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
602     }
603     if (F) {
604       Mat B;
605 
606       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
607       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
608       ierr = MatDestroy(&B);CHKERRQ(ierr);
609     } else {
610       PetscScalar *xarray;
611       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
612       for (i=0;i<n_constraints;i++) {
613         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
614         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
615         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
616         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
617         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
618       }
619       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
620     }
621 
622     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
623     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
625     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
626     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
627     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
628     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
629     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
630     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
631     ierr = MatDestroy(&M2);CHKERRQ(ierr);
632     ierr = MatDestroy(&M3);CHKERRQ(ierr);
633     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
634     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
635     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
636     ierr = MatDestroy(&M1);CHKERRQ(ierr);
637   }
638 
639   /* Get submatrices from subdomain matrix */
640   if (n_vertices) {
641     Mat       newmat;
642     IS        is_aux;
643     PetscInt  ibs,mbs;
644     PetscBool issbaij;
645 
646     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
647     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
648     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
649     if (ibs != mbs) { /* need to convert to SEQAIJ */
650       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
651       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
652       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
653       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
654       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
655     } else {
656       /* this is safe */
657       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
658       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
659       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
660         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
661         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
662         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
663         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
664       } else {
665         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
666         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
667       }
668     }
669     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
670   }
671 
672   /* Matrix of coarse basis functions (local) */
673   if (pcbddc->coarse_phi_B) {
674     PetscInt on_B,on_primal,on_D=n_D;
675     if (pcbddc->coarse_phi_D) {
676       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
677     }
678     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
679     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
680       PetscScalar *marray;
681 
682       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
683       ierr = PetscFree(marray);CHKERRQ(ierr);
684       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
685       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
686       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
687       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
688     }
689   }
690 
691   if (!pcbddc->coarse_phi_B) {
692     PetscScalar *marray;
693 
694     n = n_B*pcbddc->local_primal_size;
695     if (pcbddc->switch_static || pcbddc->dbg_flag) {
696       n += n_D*pcbddc->local_primal_size;
697     }
698     if (!pcbddc->issym) {
699       n *= 2;
700     }
701     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
702     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
703     n = n_B*pcbddc->local_primal_size;
704     if (pcbddc->switch_static || pcbddc->dbg_flag) {
705       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
706       n += n_D*pcbddc->local_primal_size;
707     }
708     if (!pcbddc->issym) {
709       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
710       if (pcbddc->switch_static) {
711         n = n_B*pcbddc->local_primal_size;
712         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
713       }
714     } else {
715       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
716       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
717       if (pcbddc->switch_static) {
718         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
719         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
720       }
721     }
722   }
723   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
724   /* vertices */
725   if (n_vertices) {
726     Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
727     PetscScalar  *x,*y;
728     PetscBLASInt B_N,B_one = 1;
729 
730     ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
731     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
732     ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
733     if (F) {
734       ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
735     } else {
736       ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
737       for (i=0;i<n_vertices;i++) {
738         ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
739         ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
740         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
741         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
742         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
743       }
744       ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
745     }
746     ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
747     /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
748     if (n_constraints) {
749       Mat B;
750       ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
751       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
752       ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
753       ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
754       ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
755       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
756       ierr = MatDestroy(&B);CHKERRQ(ierr);
757     }
758     ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
759     ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
760     ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
761     ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
762     ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
763     ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
764     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
765     ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
766     ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
767     ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
768     ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
769     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
770     /* coarse basis functions */
771     for (i=0;i<n_vertices;i++) {
772       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
773       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
774       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
775       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
776       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
777       y[n_B*i+idx_V_B[i]] = 1.0;
778       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
779       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
780 
781       if (pcbddc->switch_static || pcbddc->dbg_flag) {
782         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
783         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
784         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
785         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
786         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
787         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
788       }
789       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
790     }
791     ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
792     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
793   }
794 
795   if (n_constraints) {
796     Mat B;
797 
798     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
799     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
800     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
801     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
802     if (n_vertices) {
803       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
804     }
805     ierr = MatDestroy(&B);CHKERRQ(ierr);
806     /* coarse basis functions */
807     for (i=0;i<n_constraints;i++) {
808       PetscScalar *y;
809 
810       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
811       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
812       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
813       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
814       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
815       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
816       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
817 
818       if (pcbddc->switch_static || pcbddc->dbg_flag) {
819         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
820         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
821         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
824         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
825       }
826       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
827     }
828   }
829 
830   /* compute other basis functions for non-symmetric problems */
831   if (!pcbddc->issym) {
832     Mat B,X;
833 
834     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
835 
836     if (n_constraints) {
837       Mat S_VCT,S_CCT,B_C;
838 
839       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
840       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
841       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
842       if (n_vertices) {
843         Mat B_V;
844 
845         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
846         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
847         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
848         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
849       }
850       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
851     }
852     if (n_vertices) {
853       Mat          A_VRT;
854       PetscBLASInt B_N,B_one = 1;
855 
856       if (!n_constraints) { /* if there are no constraints, reset work */
857         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
858       }
859       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
860       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
861       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
862       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
863     }
864 
865     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
866     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
867       for (i=0;i<pcbddc->local_primal_size;i++) {
868         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
869         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
870         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
871         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
872         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
873       }
874     } else {
875       for (i=0;i<pcbddc->local_primal_size;i++) {
876         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
877         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
878         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
879         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
880         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
881       }
882     }
883     ierr = MatDestroy(&B);CHKERRQ(ierr);
884     /* coarse basis functions */
885     for (i=0;i<pcbddc->local_primal_size;i++) {
886       PetscScalar *y;
887 
888       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
889       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
890       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
891       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893       if (i<n_vertices) {
894         y[n_B*i+idx_V_B[i]] = 1.0;
895       }
896       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
897       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
898 
899       if (pcbddc->switch_static || pcbddc->dbg_flag) {
900         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
901         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
902         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
903         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
905         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
906       }
907       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
908     }
909     ierr = MatDestroy(&X);CHKERRQ(ierr);
910   }
911 
912   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
913   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
914   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
915   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
916   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
917   /* Checking coarse_sub_mat and coarse basis functios */
918   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
919   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
920   if (pcbddc->dbg_flag) {
921     Mat         coarse_sub_mat;
922     Mat         AUXMAT,TM1,TM2,TM3,TM4;
923     Mat         coarse_phi_D,coarse_phi_B;
924     Mat         coarse_psi_D,coarse_psi_B;
925     Mat         A_II,A_BB,A_IB,A_BI;
926     MatType     checkmattype=MATSEQAIJ;
927     PetscReal   real_value;
928 
929     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
930     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
931     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
932     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
933     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
934     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
935     if (unsymmetric_check) {
936       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
937       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
938     }
939     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
940 
941     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
942     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
944     if (unsymmetric_check) {
945       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
946       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
947       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
948       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
949       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
950       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
951       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
952       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
953       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
954       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
955       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
956       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
957     } else {
958       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
959       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
960       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
961       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
962       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
963       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
964       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
965       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
966     }
967     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
968     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
969     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
970     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
971     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
972     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
973 
974     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
975     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
976     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
977     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
978     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
979     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
980     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
981     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
982     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
983     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
984     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
985     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
986     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
987     if (unsymmetric_check) {
988       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
989       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
990     }
991     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
992   }
993 
994   /* free memory */
995   ierr = PetscFree(work);CHKERRQ(ierr);
996   if (n_vertices) {
997     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
998   }
999   if (n_constraints) {
1000     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1001   }
1002   /* get back data */
1003   *coarse_submat_vals_n = coarse_submat_vals;
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1009 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, MatStructure reuse, Mat* B)
1010 {
1011   Mat            *work_mat;
1012   IS             isrow_s,iscol_s;
1013   PetscBool      rsorted,csorted;
1014   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1015   PetscErrorCode ierr;
1016 
1017   PetscFunctionBegin;
1018   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1019   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1020   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1021   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1022 
1023   if (!rsorted) {
1024     const PetscInt *idxs;
1025     PetscInt *idxs_sorted,i;
1026 
1027     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1028     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1029     for (i=0;i<rsize;i++) {
1030       idxs_perm_r[i] = i;
1031     }
1032     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1033     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1034     for (i=0;i<rsize;i++) {
1035       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1036     }
1037     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1038     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1039   } else {
1040     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1041     isrow_s = isrow;
1042   }
1043 
1044   if (!csorted) {
1045     if (isrow == iscol) {
1046       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1047       iscol_s = isrow_s;
1048     } else {
1049       const PetscInt *idxs;
1050       PetscInt *idxs_sorted,i;
1051 
1052       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1053       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1054       for (i=0;i<csize;i++) {
1055         idxs_perm_c[i] = i;
1056       }
1057       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1058       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1059       for (i=0;i<csize;i++) {
1060         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1061       }
1062       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1063       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1064     }
1065   } else {
1066     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1067     iscol_s = iscol;
1068   }
1069 
1070   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,reuse,&work_mat);CHKERRQ(ierr);
1071 
1072   if (!rsorted || !csorted) {
1073     Mat      new_mat;
1074     IS       is_perm_r,is_perm_c;
1075 
1076     if (!rsorted) {
1077       PetscInt *idxs_r,i;
1078       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1079       for (i=0;i<rsize;i++) {
1080         idxs_r[idxs_perm_r[i]] = i;
1081       }
1082       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1083       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1084     } else {
1085       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1086     }
1087     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1088 
1089     if (!csorted) {
1090       if (isrow_s == iscol_s) {
1091         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1092         is_perm_c = is_perm_r;
1093       } else {
1094         PetscInt *idxs_c,i;
1095         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1096         for (i=0;i<csize;i++) {
1097           idxs_c[idxs_perm_c[i]] = i;
1098         }
1099         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1100         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1101       }
1102     } else {
1103       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1104     }
1105     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1106 
1107     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1108     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1109     work_mat[0] = new_mat;
1110     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1111     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1112   }
1113 
1114   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1115   *B = work_mat[0];
1116   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1117   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1118   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1124 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1125 {
1126   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1127   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1128   Mat            new_mat;
1129   IS             is_local,is_global;
1130   PetscInt       local_size;
1131   PetscBool      isseqaij;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1136   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1137   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1138   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1139   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1140   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,MAT_INITIAL_MATRIX,&new_mat);CHKERRQ(ierr);
1141   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1142 
1143   /* check */
1144   if (pcbddc->dbg_flag) {
1145     Vec       x,x_change;
1146     PetscReal error;
1147 
1148     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1149     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1150     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1151     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1152     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1153     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1154     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1155     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1156     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1157     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1158     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1159     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1160     ierr = VecDestroy(&x);CHKERRQ(ierr);
1161     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1162   }
1163 
1164   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1165   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1166   if (isseqaij) {
1167     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1168   } else {
1169     Mat work_mat;
1170     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1171     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1173   }
1174   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1175   /*
1176   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1177   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1178   */
1179   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1185 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1186 {
1187   PC_IS*         pcis = (PC_IS*)(pc->data);
1188   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1189   IS             is_aux1,is_aux2;
1190   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1191   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1192   PetscInt       vbs,bs;
1193   PetscBT        bitmask;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   /*
1198     No need to setup local scatters if
1199       - primal space is unchanged
1200         AND
1201       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1202         AND
1203       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1204   */
1205   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1206     PetscFunctionReturn(0);
1207   }
1208   /* destroy old objects */
1209   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1210   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1211   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1212   /* Set Non-overlapping dimensions */
1213   n_B = pcis->n_B; n_D = pcis->n - n_B;
1214   n_vertices = pcbddc->n_actual_vertices;
1215   /* create auxiliary bitmask */
1216   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1217   for (i=0;i<n_vertices;i++) {
1218     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1219   }
1220 
1221   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1222   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1223   for (i=0, n_R=0; i<pcis->n; i++) {
1224     if (!PetscBTLookup(bitmask,i)) {
1225       idx_R_local[n_R] = i;
1226       n_R++;
1227     }
1228   }
1229 
1230   /* Block code */
1231   vbs = 1;
1232   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1233   if (bs>1 && !(n_vertices%bs)) {
1234     PetscBool is_blocked = PETSC_TRUE;
1235     PetscInt  *vary;
1236     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1237     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1238     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1239     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1240     for (i=0; i<n_vertices; i++) {
1241       if (vary[i]!=0 && vary[i]!=bs) {
1242         is_blocked = PETSC_FALSE;
1243         break;
1244       }
1245     }
1246     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1247       vbs = bs;
1248       for (i=0;i<n_R/vbs;i++) {
1249         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1250       }
1251     }
1252     ierr = PetscFree(vary);CHKERRQ(ierr);
1253   }
1254   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1255   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1256 
1257   /* print some info if requested */
1258   if (pcbddc->dbg_flag) {
1259     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1260     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1261     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1262     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1263     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1266     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1267   }
1268 
1269   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1270   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1271   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1272   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1273   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1274   for (i=0; i<n_D; i++) {
1275     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1276   }
1277   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1278   for (i=0, j=0; i<n_R; i++) {
1279     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1280       aux_array1[j++] = i;
1281     }
1282   }
1283   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1284   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1285   for (i=0, j=0; i<n_B; i++) {
1286     if (!PetscBTLookup(bitmask,is_indices[i])) {
1287       aux_array2[j++] = i;
1288     }
1289   }
1290   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1291   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1292   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1293   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1294   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1295 
1296   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1297     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1298     for (i=0, j=0; i<n_R; i++) {
1299       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1300         aux_array1[j++] = i;
1301       }
1302     }
1303     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1304     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1305     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1306   }
1307   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1308   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1315 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1316 {
1317   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1318   PC_IS          *pcis = (PC_IS*)pc->data;
1319   PC             pc_temp;
1320   Mat            A_RR;
1321   MatReuse       reuse;
1322   PetscScalar    m_one = -1.0;
1323   PetscReal      value;
1324   PetscInt       n_D,n_R,ibs,mbs;
1325   PetscBool      use_exact,use_exact_reduced,issbaij;
1326   PetscErrorCode ierr;
1327   /* prefixes stuff */
1328   char           dir_prefix[256],neu_prefix[256],str_level[16];
1329   size_t         len;
1330 
1331   PetscFunctionBegin;
1332 
1333   /* compute prefixes */
1334   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1335   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1336   if (!pcbddc->current_level) {
1337     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1338     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1339     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1340     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1341   } else {
1342     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1343     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1344     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1345     len -= 15; /* remove "pc_bddc_coarse_" */
1346     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1347     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1348     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1349     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1350     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1351     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1352     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1353     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1354   }
1355 
1356   /* DIRICHLET PROBLEM */
1357   if (dirichlet) {
1358     /* Matrix for Dirichlet problem is pcis->A_II */
1359     ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1360     if (!pcbddc->ksp_D) { /* create object if not yet build */
1361       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1362       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1363       /* default */
1364       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1365       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1366       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1367       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1368       if (issbaij) {
1369         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1370       } else {
1371         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1372       }
1373       /* Allow user's customization */
1374       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1375       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1376     }
1377     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1378     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1379     if (!n_D) {
1380       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1381       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1382     }
1383     /* Set Up KSP for Dirichlet problem of BDDC */
1384     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1385     /* set ksp_D into pcis data */
1386     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1387     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1388     pcis->ksp_D = pcbddc->ksp_D;
1389   }
1390 
1391   /* NEUMANN PROBLEM */
1392   A_RR = 0;
1393   if (neumann) {
1394     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1395     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1396     if (pcbddc->ksp_R) { /* already created ksp */
1397       PetscInt nn_R;
1398       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1399       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1400       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1401       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1402         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1403         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1404         reuse = MAT_INITIAL_MATRIX;
1405       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1406         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1407           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1408           reuse = MAT_INITIAL_MATRIX;
1409         } else { /* safe to reuse the matrix */
1410           reuse = MAT_REUSE_MATRIX;
1411         }
1412       }
1413       /* last check */
1414       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1415         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1416         reuse = MAT_INITIAL_MATRIX;
1417       }
1418     } else { /* first time, so we need to create the matrix */
1419       reuse = MAT_INITIAL_MATRIX;
1420     }
1421     /* extract A_RR */
1422     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1423     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1424     if (ibs != mbs) {
1425       Mat newmat;
1426       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1427       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1428       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1429     } else {
1430       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1431     }
1432     if (!pcbddc->ksp_R) { /* create object if not present */
1433       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1434       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1435       /* default */
1436       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1437       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1439       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1440       if (issbaij) {
1441         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1442       } else {
1443         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1444       }
1445       /* Allow user's customization */
1446       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1447       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1448     }
1449     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1450     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1451     if (!n_R) {
1452       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1453       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1454     }
1455     /* Set Up KSP for Neumann problem of BDDC */
1456     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1457   }
1458 
1459   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1460   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1461     if (pcbddc->dbg_flag) {
1462       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1464       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1465     }
1466     if (dirichlet) { /* Dirichlet */
1467       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1468       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1469       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1470       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1471       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1472       /* need to be adapted? */
1473       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1474       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1475       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1476       /* print info */
1477       if (pcbddc->dbg_flag) {
1478         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1479         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1480       }
1481       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1482         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1483       }
1484     }
1485     if (neumann) { /* Neumann */
1486       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1487       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1488       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1489       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1490       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1491       /* need to be adapted? */
1492       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1493       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1494       /* print info */
1495       if (pcbddc->dbg_flag) {
1496         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1497         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1498       }
1499       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1500         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1501       }
1502     }
1503   }
1504   /* free Neumann problem's matrix */
1505   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1511 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1512 {
1513   PetscErrorCode ierr;
1514   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1515 
1516   PetscFunctionBegin;
1517   if (applytranspose) {
1518     if (pcbddc->local_auxmat1) {
1519       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1520       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1521     }
1522     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1523   } else {
1524     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1525     if (pcbddc->local_auxmat1) {
1526       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1527       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1528     }
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1534 #undef __FUNCT__
1535 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1536 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1537 {
1538   PetscErrorCode ierr;
1539   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1540   PC_IS*            pcis = (PC_IS*)  (pc->data);
1541   const PetscScalar zero = 0.0;
1542 
1543   PetscFunctionBegin;
1544   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1545   if (applytranspose) {
1546     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1547     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1548   } else {
1549     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1550     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1551   }
1552   /* start communications from local primal nodes to rhs of coarse solver */
1553   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1554   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1556 
1557   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1558   /* TODO remove null space when doing multilevel */
1559   if (pcbddc->coarse_ksp) {
1560     if (applytranspose) {
1561       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1562     } else {
1563       ierr = KSPSolve(pcbddc->coarse_ksp,NULL,NULL);CHKERRQ(ierr);
1564     }
1565   }
1566 
1567   /* Local solution on R nodes */
1568   if (pcis->n) {
1569     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1570     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1571     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1572     if (pcbddc->switch_static) {
1573       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1574       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1575     }
1576     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1577     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1578     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1579     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1580     if (pcbddc->switch_static) {
1581       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1582       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1583     }
1584   }
1585 
1586   /* communications from coarse sol to local primal nodes */
1587   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1588   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1589 
1590   /* Sum contributions from two levels */
1591   if (applytranspose) {
1592     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1593     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1594   } else {
1595     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1596     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 /* TODO: the following two function can be optimized using VecPlaceArray whenever possible and using overlap flag */
1602 #undef __FUNCT__
1603 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1604 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1605 {
1606   PetscErrorCode ierr;
1607   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1608   PetscScalar    *array,*array2;
1609   Vec            from,to;
1610 
1611   PetscFunctionBegin;
1612   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1613     from = pcbddc->coarse_vec;
1614     to = pcbddc->vec1_P;
1615     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1616       Vec tvec;
1617       PetscInt lsize;
1618       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1619       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1620       ierr = VecGetArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1621       ierr = VecGetArray(from,&array2);CHKERRQ(ierr);
1622       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1623       ierr = VecRestoreArrayRead(tvec,(const PetscScalar**)&array);CHKERRQ(ierr);
1624       ierr = VecRestoreArray(from,&array2);CHKERRQ(ierr);
1625     }
1626   } else { /* from local to global -> put data in coarse right hand side */
1627     from = pcbddc->vec1_P;
1628     to = pcbddc->coarse_vec;
1629   }
1630   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1636 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1637 {
1638   PetscErrorCode ierr;
1639   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1640   PetscScalar    *array,*array2;
1641   Vec            from,to;
1642 
1643   PetscFunctionBegin;
1644   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1645     from = pcbddc->coarse_vec;
1646     to = pcbddc->vec1_P;
1647   } else { /* from local to global -> put data in coarse right hand side */
1648     from = pcbddc->vec1_P;
1649     to = pcbddc->coarse_vec;
1650   }
1651   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1652   if (smode == SCATTER_FORWARD) {
1653     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1654       Vec tvec;
1655       PetscInt lsize;
1656       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1657       ierr = VecGetLocalSize(tvec,&lsize);CHKERRQ(ierr);
1658       ierr = VecGetArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1659       ierr = VecGetArray(tvec,&array2);CHKERRQ(ierr);
1660       ierr = PetscMemcpy(array2,array,lsize*sizeof(PetscScalar));CHKERRQ(ierr);
1661       ierr = VecRestoreArrayRead(to,(const PetscScalar**)&array);CHKERRQ(ierr);
1662       ierr = VecRestoreArray(tvec,&array2);CHKERRQ(ierr);
1663     }
1664   }
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 /* uncomment for testing purposes */
1669 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1670 #undef __FUNCT__
1671 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1672 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1673 {
1674   PetscErrorCode    ierr;
1675   PC_IS*            pcis = (PC_IS*)(pc->data);
1676   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1677   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1678   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1679   MatType           impMatType=MATSEQAIJ;
1680   /* one and zero */
1681   PetscScalar       one=1.0,zero=0.0;
1682   /* space to store constraints and their local indices */
1683   PetscScalar       *temp_quadrature_constraint;
1684   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1685   /* iterators */
1686   PetscInt          i,j,k,total_counts,temp_start_ptr;
1687   /* stuff to store connected components stored in pcbddc->mat_graph */
1688   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1689   PetscInt          n_ISForFaces,n_ISForEdges;
1690   /* near null space stuff */
1691   MatNullSpace      nearnullsp;
1692   const Vec         *nearnullvecs;
1693   Vec               *localnearnullsp;
1694   PetscBool         nnsp_has_cnst;
1695   PetscInt          nnsp_size;
1696   PetscScalar       *array;
1697   /* BLAS integers */
1698   PetscBLASInt      lwork,lierr;
1699   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1700   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1701   /* LAPACK working arrays for SVD or POD */
1702   PetscBool         skip_lapack;
1703   PetscScalar       *work;
1704   PetscReal         *singular_vals;
1705 #if defined(PETSC_USE_COMPLEX)
1706   PetscReal         *rwork;
1707 #endif
1708 #if defined(PETSC_MISSING_LAPACK_GESVD)
1709   PetscBLASInt      Blas_one_2=1;
1710   PetscScalar       *temp_basis,*correlation_mat;
1711 #else
1712   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1713   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1714 #endif
1715   /* reuse */
1716   PetscInt          olocal_primal_size;
1717   PetscInt          *oprimal_indices_local_idxs;
1718   /* change of basis */
1719   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1720   PetscBool         boolforchange,qr_needed;
1721   PetscBT           touched,change_basis,qr_needed_idx;
1722   /* auxiliary stuff */
1723   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1724   PetscInt          ncc,*gidxs,*permutation,*temp_indices_to_constraint_work;
1725   PetscScalar       *temp_quadrature_constraint_work;
1726   /* some quantities */
1727   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1728   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1729 
1730 
1731   PetscFunctionBegin;
1732   /* Destroy Mat objects computed previously */
1733   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1734   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1735   /* TODO synch with adaptive selection */
1736   /* Get index sets for faces, edges and vertices from graph */
1737   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1738   /* free unneeded index sets */
1739   if (!pcbddc->use_vertices) {
1740     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1741   }
1742   if (!pcbddc->use_edges) {
1743     for (i=0;i<n_ISForEdges;i++) {
1744       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1745     }
1746     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1747     n_ISForEdges = 0;
1748   }
1749   if (!pcbddc->use_faces) {
1750     for (i=0;i<n_ISForFaces;i++) {
1751       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1752     }
1753     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1754     n_ISForFaces = 0;
1755   }
1756   /* HACKS (the following two blocks of code) */
1757   if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1758     pcbddc->use_change_of_basis = PETSC_TRUE;
1759     if (!ISForEdges) {
1760       pcbddc->use_change_on_faces = PETSC_TRUE;
1761     }
1762   }
1763   if (pcbddc->NullSpace) {
1764     /* use_change_of_basis should be consistent among processors */
1765     PetscBool tbool[2],gbool[2];
1766     tbool [0] = pcbddc->use_change_of_basis;
1767     tbool [1] = pcbddc->use_change_on_faces;
1768     ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1769     pcbddc->use_change_of_basis = gbool[0];
1770     pcbddc->use_change_on_faces = gbool[1];
1771   }
1772   /* print some info */
1773   if (pcbddc->dbg_flag) {
1774     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1775     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1776     i = 0;
1777     if (ISForVertices) {
1778       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1779     }
1780     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1781     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1782     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1783     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1784   }
1785 
1786   if (!pcbddc->adaptive_selection) {
1787     /* check if near null space is attached to global mat */
1788     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1789     if (nearnullsp) {
1790       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1791       /* remove any stored info */
1792       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1793       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1794       /* store information for BDDC solver reuse */
1795       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1796       pcbddc->onearnullspace = nearnullsp;
1797       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1798       for (i=0;i<nnsp_size;i++) {
1799         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1800       }
1801     } else { /* if near null space is not provided BDDC uses constants by default */
1802       nnsp_size = 0;
1803       nnsp_has_cnst = PETSC_TRUE;
1804     }
1805     /* get max number of constraints on a single cc */
1806     max_constraints = nnsp_size;
1807     if (nnsp_has_cnst) max_constraints++;
1808 
1809     /*
1810          Evaluate maximum storage size needed by the procedure
1811          - temp_indices will contain start index of each constraint stored as follows
1812          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1813          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1814          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1815                                                                                                                                                            */
1816     total_counts = n_ISForFaces+n_ISForEdges;
1817     total_counts *= max_constraints;
1818     n_vertices = 0;
1819     if (ISForVertices) {
1820       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1821     }
1822     total_counts += n_vertices;
1823     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1824     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1825     total_counts = 0;
1826     max_size_of_constraint = 0;
1827     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1828       if (i<n_ISForEdges) {
1829         used_IS = &ISForEdges[i];
1830       } else {
1831         used_IS = &ISForFaces[i-n_ISForEdges];
1832       }
1833       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1834       total_counts += j;
1835       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1836     }
1837     total_counts *= max_constraints;
1838     total_counts += n_vertices;
1839     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1840     /* get local part of global near null space vectors */
1841     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1842     for (k=0;k<nnsp_size;k++) {
1843       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1844       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1845       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1846     }
1847 
1848     /* whether or not to skip lapack calls */
1849     skip_lapack = PETSC_TRUE;
1850     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1851 
1852     /* allocate some auxiliary stuff */
1853     if (!skip_lapack || pcbddc->use_qr_single) {
1854       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1855     }
1856 
1857     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1858     if (!skip_lapack) {
1859       PetscScalar temp_work;
1860 
1861 #if defined(PETSC_MISSING_LAPACK_GESVD)
1862       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1863       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1864       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1865       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1866 #if defined(PETSC_USE_COMPLEX)
1867       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1868 #endif
1869       /* now we evaluate the optimal workspace using query with lwork=-1 */
1870       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1871       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1872       lwork = -1;
1873       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1874 #if !defined(PETSC_USE_COMPLEX)
1875       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1876 #else
1877       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1878 #endif
1879       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1880       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1881 #else /* on missing GESVD */
1882       /* SVD */
1883       PetscInt max_n,min_n;
1884       max_n = max_size_of_constraint;
1885       min_n = max_constraints;
1886       if (max_size_of_constraint < max_constraints) {
1887         min_n = max_size_of_constraint;
1888         max_n = max_constraints;
1889       }
1890       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1891 #if defined(PETSC_USE_COMPLEX)
1892       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1893 #endif
1894       /* now we evaluate the optimal workspace using query with lwork=-1 */
1895       lwork = -1;
1896       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1897       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1898       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1899       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1900 #if !defined(PETSC_USE_COMPLEX)
1901       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1902 #else
1903       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1904 #endif
1905       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1906       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1907 #endif /* on missing GESVD */
1908       /* Allocate optimal workspace */
1909       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1910       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1911     }
1912     /* Now we can loop on constraining sets */
1913     total_counts = 0;
1914     temp_indices[0] = 0;
1915     /* vertices */
1916     if (ISForVertices) {
1917       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1918       if (nnsp_has_cnst) { /* consider all vertices */
1919         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1920         for (i=0;i<n_vertices;i++) {
1921           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1922           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1923           total_counts++;
1924         }
1925       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1926         PetscBool used_vertex;
1927         for (i=0;i<n_vertices;i++) {
1928           used_vertex = PETSC_FALSE;
1929           k = 0;
1930           while (!used_vertex && k<nnsp_size) {
1931             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1932             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1933               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1934               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1935               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1936               total_counts++;
1937               used_vertex = PETSC_TRUE;
1938             }
1939             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1940             k++;
1941           }
1942         }
1943       }
1944       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1945       n_vertices = total_counts;
1946     }
1947 
1948     /* edges and faces */
1949     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
1950       if (ncc<n_ISForEdges) {
1951         used_IS = &ISForEdges[ncc];
1952         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1953       } else {
1954         used_IS = &ISForFaces[ncc-n_ISForEdges];
1955         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1956       }
1957       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1958       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1959       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1960       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1961       /* change of basis should not be performed on local periodic nodes */
1962       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1963       if (nnsp_has_cnst) {
1964         PetscScalar quad_value;
1965         temp_constraints++;
1966         if (!pcbddc->use_nnsp_true) {
1967           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1968         } else {
1969           quad_value = 1.0;
1970         }
1971         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1972         for (j=0;j<size_of_constraint;j++) {
1973           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1974         }
1975         /* sort by global ordering if using lapack subroutines (not needed!) */
1976         if (!skip_lapack || pcbddc->use_qr_single) {
1977           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
1978           for (j=0;j<size_of_constraint;j++) {
1979             permutation[j]=j;
1980           }
1981           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
1982           for (j=0;j<size_of_constraint;j++) {
1983             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
1984           }
1985           for (j=0;j<size_of_constraint;j++) {
1986             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
1987             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
1988           }
1989           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1990           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
1991         }
1992         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1993         total_counts++;
1994       }
1995       for (k=0;k<nnsp_size;k++) {
1996         PetscReal real_value;
1997         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1998         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1999         for (j=0;j<size_of_constraint;j++) {
2000           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2001         }
2002         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2003         /* check if array is null on the connected component */
2004         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2005         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2006         if (real_value > 0.0) { /* keep indices and values */
2007           /* sort by global ordering if using lapack subroutines */
2008           if (!skip_lapack || pcbddc->use_qr_single) {
2009             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2010             for (j=0;j<size_of_constraint;j++) {
2011               permutation[j]=j;
2012             }
2013             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2014             for (j=0;j<size_of_constraint;j++) {
2015               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2016               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2017             }
2018             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2019             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2020           }
2021           temp_constraints++;
2022           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2023           total_counts++;
2024         }
2025       }
2026       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2027       valid_constraints = temp_constraints;
2028       if (!pcbddc->use_nnsp_true && temp_constraints) {
2029         if (temp_constraints == 1) { /* just normalize the constraint */
2030           PetscScalar norm;
2031           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2032           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2033           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2034           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2035         } else { /* perform SVD */
2036           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2037 
2038 #if defined(PETSC_MISSING_LAPACK_GESVD)
2039           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2040              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2041              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2042                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2043                 from that computed using LAPACKgesvd
2044              -> This is due to a different computation of eigenvectors in LAPACKheev
2045              -> The quality of the POD-computed basis will be the same */
2046           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2047           /* Store upper triangular part of correlation matrix */
2048           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2049           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2050           for (j=0;j<temp_constraints;j++) {
2051             for (k=0;k<j+1;k++) {
2052               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
2053             }
2054           }
2055           /* compute eigenvalues and eigenvectors of correlation matrix */
2056           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2057           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2058 #if !defined(PETSC_USE_COMPLEX)
2059           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2060 #else
2061           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2062 #endif
2063           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2064           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2065           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2066           j = 0;
2067           while (j < temp_constraints && singular_vals[j] < tol) j++;
2068           total_counts = total_counts-j;
2069           valid_constraints = temp_constraints-j;
2070           /* scale and copy POD basis into used quadrature memory */
2071           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2072           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2073           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2074           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2075           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2076           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2077           if (j<temp_constraints) {
2078             PetscInt ii;
2079             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2080             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2081             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2082             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2083             for (k=0;k<temp_constraints-j;k++) {
2084               for (ii=0;ii<size_of_constraint;ii++) {
2085                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2086               }
2087             }
2088           }
2089 #else  /* on missing GESVD */
2090           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2091           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2092           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2093           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2094 #if !defined(PETSC_USE_COMPLEX)
2095           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
2096 #else
2097           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
2098 #endif
2099           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2100           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2101           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2102           k = temp_constraints;
2103           if (k > size_of_constraint) k = size_of_constraint;
2104           j = 0;
2105           while (j < k && singular_vals[k-j-1] < tol) j++;
2106           valid_constraints = k-j;
2107           total_counts = total_counts-temp_constraints+valid_constraints;
2108 #endif /* on missing GESVD */
2109         }
2110       }
2111       /* setting change_of_basis flag is safe now */
2112       if (boolforchange) {
2113         for (j=0;j<valid_constraints;j++) {
2114           PetscBTSet(change_basis,total_counts-j-1);
2115         }
2116       }
2117     }
2118     /* free workspace */
2119     if (!skip_lapack || pcbddc->use_qr_single) {
2120       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2121     }
2122     if (!skip_lapack) {
2123       ierr = PetscFree(work);CHKERRQ(ierr);
2124 #if defined(PETSC_USE_COMPLEX)
2125       ierr = PetscFree(rwork);CHKERRQ(ierr);
2126 #endif
2127       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2128 #if defined(PETSC_MISSING_LAPACK_GESVD)
2129       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2130       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2131 #endif
2132     }
2133     for (k=0;k<nnsp_size;k++) {
2134       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2135     }
2136     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2137   } else {
2138     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2139     PetscInt        cum = 0;
2140 
2141     total_counts = 0;
2142     n_vertices = 0;
2143     if (sub_schurs->is_Ej_com) {
2144       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2145     }
2146     max_constraints = 0;
2147     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2148       total_counts += pcbddc->adaptive_constraints_n[i];
2149       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2150     }
2151 
2152     temp_indices = pcbddc->adaptive_constraints_ptrs;
2153     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2154     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2155 
2156 #if 0
2157     printf("Found %d totals\n",total_counts);
2158     for (i=0;i<total_counts;i++) {
2159       printf("const %d, start %d",i,temp_indices[i]);
2160       printf(" end %d:\n",temp_indices[i+1]);
2161       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2162         printf("  idxs %d",temp_indices_to_constraint[j]);
2163         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2164       }
2165     }
2166 #endif
2167 
2168     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2169     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2170     /* Change of basis */
2171     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2172     cum = 0;
2173     if (pcbddc->use_change_of_basis) {
2174       for (i=0;i<sub_schurs->n_subs;i++) {
2175         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2176           for (j=0;j<pcbddc->adaptive_constraints_n[i];j++) {
2177             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2178           }
2179         }
2180         cum += pcbddc->adaptive_constraints_n[i];
2181       }
2182     }
2183   }
2184 
2185   /* free index sets of faces, edges and vertices */
2186   for (i=0;i<n_ISForFaces;i++) {
2187     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2188   }
2189   if (n_ISForFaces) {
2190     ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2191   }
2192   for (i=0;i<n_ISForEdges;i++) {
2193     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2194   }
2195   if (n_ISForEdges) {
2196     ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2197   }
2198   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2199 
2200   /* map temp_indices_to_constraint in boundary numbering */
2201   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2202   if (i != temp_indices[total_counts]) {
2203     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2204   }
2205 
2206   /* set quantities in pcbddc data structure and store previous primal size */
2207   /* n_vertices defines the number of subdomain corners in the primal space */
2208   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2209   olocal_primal_size = pcbddc->local_primal_size;
2210   pcbddc->local_primal_size = total_counts;
2211   pcbddc->n_vertices = n_vertices;
2212   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2213 
2214   /* Create constraint matrix */
2215   /* The constraint matrix is used to compute the l2g map of primal dofs */
2216   /* so we need to set it up properly either with or without change of basis */
2217   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2218   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
2219   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2220   /* array to compute a local numbering of constraints : vertices first then constraints */
2221   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2222   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2223   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2224   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2225   /* auxiliary stuff for basis change */
2226   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2227   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2228 
2229   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2230   total_primal_vertices=0;
2231   for (i=0;i<pcbddc->local_primal_size;i++) {
2232     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2233     if (size_of_constraint == 1) {
2234       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2235       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2236       aux_primal_minloc[total_primal_vertices]=0;
2237       total_primal_vertices++;
2238     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2239       PetscInt min_loc,min_index;
2240       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2241       /* find first untouched local node */
2242       k = 0;
2243       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2244       min_index = global_indices[k];
2245       min_loc = k;
2246       /* search the minimum among global nodes already untouched on the cc */
2247       for (k=1;k<size_of_constraint;k++) {
2248         /* there can be more than one constraint on a single connected component */
2249         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2250           min_index = global_indices[k];
2251           min_loc = k;
2252         }
2253       }
2254       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2255       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2256       aux_primal_minloc[total_primal_vertices]=min_loc;
2257       total_primal_vertices++;
2258     }
2259   }
2260   /* determine if a QR strategy is needed for change of basis */
2261   qr_needed = PETSC_FALSE;
2262   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2263   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2264     if (PetscBTLookup(change_basis,i)) {
2265       if (!pcbddc->use_qr_single) {
2266         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2267         j = 0;
2268         for (k=0;k<size_of_constraint;k++) {
2269           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2270             j++;
2271           }
2272         }
2273         /* found more than one primal dof on the cc */
2274         if (j > 1) {
2275           PetscBTSet(qr_needed_idx,i);
2276           qr_needed = PETSC_TRUE;
2277         }
2278       } else {
2279         PetscBTSet(qr_needed_idx,i);
2280         qr_needed = PETSC_TRUE;
2281       }
2282     }
2283   }
2284   /* free workspace */
2285   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2286 
2287   /* permute indices in order to have a sorted set of vertices */
2288   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2289 
2290   /* nonzero structure of constraint matrix */
2291   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2292   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2293   j=total_primal_vertices;
2294   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2295     if (!PetscBTLookup(change_basis,i)) {
2296       nnz[j]=temp_indices[i+1]-temp_indices[i];
2297       j++;
2298     }
2299   }
2300   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2301   ierr = PetscFree(nnz);CHKERRQ(ierr);
2302   /* set values in constraint matrix */
2303   for (i=0;i<total_primal_vertices;i++) {
2304     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2305   }
2306   total_counts = total_primal_vertices;
2307   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2308     if (!PetscBTLookup(change_basis,i)) {
2309       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2310       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2311       total_counts++;
2312     }
2313   }
2314   /* assembling */
2315   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2316   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   /*
2318   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2319   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2320   */
2321   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2322   if (pcbddc->use_change_of_basis) {
2323     /* dual and primal dofs on a single cc */
2324     PetscInt     dual_dofs,primal_dofs;
2325     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2326     PetscInt     primal_counter;
2327     /* working stuff for GEQRF */
2328     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2329     PetscBLASInt lqr_work;
2330     /* working stuff for UNGQR */
2331     PetscScalar  *gqr_work,lgqr_work_t;
2332     PetscBLASInt lgqr_work;
2333     /* working stuff for TRTRS */
2334     PetscScalar  *trs_rhs;
2335     PetscBLASInt Blas_NRHS;
2336     /* pointers for values insertion into change of basis matrix */
2337     PetscInt     *start_rows,*start_cols;
2338     PetscScalar  *start_vals;
2339     /* working stuff for values insertion */
2340     PetscBT      is_primal;
2341     /* matrix sizes */
2342     PetscInt     global_size,local_size;
2343     /* work array for nonzeros */
2344     PetscScalar  *nnz_array;
2345     /* temporary change of basis */
2346     Mat          localChangeOfBasisMatrix;
2347     /* auxiliary work for global change of basis */
2348     Vec          nnz_vec;
2349     PetscInt     *idxs_I,*idxs_B,*idxs_all,*d_nnz,*o_nnz;
2350     PetscInt     nvtxs,*xadj,*adjncy,*idxs_mapped;
2351     PetscScalar  *vals;
2352     PetscBool    done;
2353 
2354     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2355     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2356     ierr = MatSetType(localChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
2357     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
2358 
2359     /* nonzeros for local mat */
2360     ierr = PetscMalloc1(pcis->n_B,&nnz);CHKERRQ(ierr);
2361     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
2362     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2363       if (PetscBTLookup(change_basis,i)) {
2364         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2365         if (PetscBTLookup(qr_needed_idx,i)) {
2366           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2367         } else {
2368           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
2369           /* get local primal index on the cc */
2370           j = 0;
2371           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2372           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
2373         }
2374       }
2375     }
2376     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2377     /* Set initial identity in the matrix */
2378     for (i=0;i<pcis->n_B;i++) {
2379       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2380     }
2381 
2382     if (pcbddc->dbg_flag) {
2383       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2384       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2385     }
2386 
2387 
2388     /* Now we loop on the constraints which need a change of basis */
2389     /*
2390        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2391        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2392 
2393        Basic blocks of change of basis matrix T computed by
2394 
2395           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2396 
2397             | 1        0   ...        0         s_1/S |
2398             | 0        1   ...        0         s_2/S |
2399             |              ...                        |
2400             | 0        ...            1     s_{n-1}/S |
2401             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2402 
2403             with S = \sum_{i=1}^n s_i^2
2404             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2405                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2406 
2407           - QR decomposition of constraints otherwise
2408     */
2409     if (qr_needed) {
2410       /* space to store Q */
2411       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2412       /* first we issue queries for optimal work */
2413       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2414       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2415       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2416       lqr_work = -1;
2417       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2418       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2419       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2420       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2421       lgqr_work = -1;
2422       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2423       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2424       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2425       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2426       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2427       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2428       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2429       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2430       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2431       /* array to store scaling factors for reflectors */
2432       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2433       /* array to store rhs and solution of triangular solver */
2434       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2435       /* allocating workspace for check */
2436       if (pcbddc->dbg_flag) {
2437         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&work);CHKERRQ(ierr);
2438       }
2439     }
2440     /* array to store whether a node is primal or not */
2441     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2442     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2443     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2444     if (i != total_primal_vertices) {
2445       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2446     }
2447     for (i=0;i<total_primal_vertices;i++) {
2448       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2449     }
2450     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2451 
2452     /* loop on constraints and see whether or not they need a change of basis and compute it */
2453     /* -> using implicit ordering contained in temp_indices data */
2454     total_counts = pcbddc->n_vertices;
2455     primal_counter = total_counts;
2456     while (total_counts<pcbddc->local_primal_size) {
2457       primal_dofs = 1;
2458       if (PetscBTLookup(change_basis,total_counts)) {
2459         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2460         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
2461           primal_dofs++;
2462         }
2463         /* get constraint info */
2464         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2465         dual_dofs = size_of_constraint-primal_dofs;
2466 
2467         if (pcbddc->dbg_flag) {
2468           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2469         }
2470 
2471         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2472 
2473           /* copy quadrature constraints for change of basis check */
2474           if (pcbddc->dbg_flag) {
2475             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2476           }
2477           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2478           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2479 
2480           /* compute QR decomposition of constraints */
2481           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2482           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2483           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2484           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2485           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2486           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2487           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2488 
2489           /* explictly compute R^-T */
2490           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2491           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2492           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2493           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2494           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2495           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2496           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2497           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2498           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2499           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2500 
2501           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2502           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2503           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2504           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2505           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2506           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2507           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2508           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2509           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2510 
2511           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2512              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2513              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2514           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2515           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2516           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2517           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2518           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2519           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2520           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2521           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2522           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2523           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2524 
2525           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2526           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
2527           /* insert cols for primal dofs */
2528           for (j=0;j<primal_dofs;j++) {
2529             start_vals = &qr_basis[j*size_of_constraint];
2530             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2531             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2532           }
2533           /* insert cols for dual dofs */
2534           for (j=0,k=0;j<dual_dofs;k++) {
2535             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2536               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2537               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2538               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2539               j++;
2540             }
2541           }
2542 
2543           /* check change of basis */
2544           if (pcbddc->dbg_flag) {
2545             PetscInt   ii,jj;
2546             PetscBool valid_qr=PETSC_TRUE;
2547             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2548             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2549             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2550             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2551             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2552             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2553             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2554             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2555             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2556             for (jj=0;jj<size_of_constraint;jj++) {
2557               for (ii=0;ii<primal_dofs;ii++) {
2558                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2559                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2560               }
2561             }
2562             if (!valid_qr) {
2563               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2564               for (jj=0;jj<size_of_constraint;jj++) {
2565                 for (ii=0;ii<primal_dofs;ii++) {
2566                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2567                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2568                   }
2569                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2570                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2571                   }
2572                 }
2573               }
2574             } else {
2575               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2576             }
2577           }
2578         } else { /* simple transformation block */
2579           PetscInt    row,col;
2580           PetscScalar val,norm;
2581 
2582           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2583           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2584           for (j=0;j<size_of_constraint;j++) {
2585             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2586             if (!PetscBTLookup(is_primal,row)) {
2587               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2588               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2589               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2590             } else {
2591               for (k=0;k<size_of_constraint;k++) {
2592                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2593                 if (row != col) {
2594                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2595                 } else {
2596                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2597                 }
2598                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2599               }
2600             }
2601           }
2602           if (pcbddc->dbg_flag) {
2603             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2604           }
2605         }
2606         /* increment primal counter */
2607         primal_counter += primal_dofs;
2608       } else {
2609         if (pcbddc->dbg_flag) {
2610           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2611         }
2612       }
2613       /* increment constraint counter total_counts */
2614       total_counts += primal_dofs;
2615     }
2616 
2617     /* free workspace */
2618     if (qr_needed) {
2619       if (pcbddc->dbg_flag) {
2620         ierr = PetscFree(work);CHKERRQ(ierr);
2621       }
2622       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2623       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2624       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2625       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2626       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2627     }
2628     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2629     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2630     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2631 
2632     /* assembling of global change of variable */
2633     ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2634     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2635     ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2636     ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2637     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2638     ierr = MatSetLocalToGlobalMapping(pcbddc->ChangeOfBasisMatrix,matis->mapping,matis->mapping);CHKERRQ(ierr);
2639 
2640     /* nonzeros (overestimated) */
2641     ierr = VecDuplicate(pcis->vec1_global,&nnz_vec);CHKERRQ(ierr);
2642     ierr = VecSetLocalToGlobalMapping(nnz_vec,matis->mapping);CHKERRQ(ierr);
2643     ierr = PetscMalloc2(pcis->n,&nnz_array,pcis->n,&idxs_all);CHKERRQ(ierr);
2644     for (i=0;i<pcis->n;i++) {
2645       nnz_array[i] = 1.0;
2646       idxs_all[i] = i;
2647     }
2648     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2649     for (i=0;i<pcis->n_B;i++) {
2650       nnz_array[idxs_B[i]] = nnz[i];
2651     }
2652     if (pcis->n) {
2653       ierr = VecSetValuesLocal(nnz_vec,pcis->n,idxs_all,nnz_array,INSERT_VALUES);CHKERRQ(ierr);
2654     }
2655     ierr = VecAssemblyBegin(nnz_vec);CHKERRQ(ierr);
2656     ierr = VecAssemblyEnd(nnz_vec);CHKERRQ(ierr);
2657     ierr = PetscFree(nnz);CHKERRQ(ierr);
2658     ierr = PetscFree2(nnz_array,idxs_all);CHKERRQ(ierr);
2659     ierr = PetscMalloc2(local_size,&d_nnz,local_size,&o_nnz);CHKERRQ(ierr);
2660     ierr = VecGetArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2661     for (i=0;i<local_size;i++) {
2662       d_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),local_size);
2663       o_nnz[i] = PetscMin((PetscInt)(PetscRealPart(nnz_array[i])),global_size-local_size);
2664     }
2665     ierr = VecRestoreArray(nnz_vec,&nnz_array);CHKERRQ(ierr);
2666     ierr = VecDestroy(&nnz_vec);CHKERRQ(ierr);
2667     ierr = MatMPIAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2668     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2669 
2670     /* Set identity on dirichlet dofs */
2671     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2672     for (i=0;i<pcis->n-pcis->n_B;i++) {
2673       PetscScalar one=1.0;
2674       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,idxs_I+i,1,idxs_I+i,&one,INSERT_VALUES);CHKERRQ(ierr);
2675     }
2676     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&idxs_I);CHKERRQ(ierr);
2677 
2678     /* Set values at interface dofs */
2679     done = PETSC_TRUE;
2680     ierr = MatGetRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2681     if (!done) {
2682       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2683     }
2684     ierr = MatSeqAIJGetArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2685     ierr = PetscMalloc1(xadj[nvtxs],&idxs_mapped);CHKERRQ(ierr);
2686     ierr = ISLocalToGlobalMappingApply(pcis->BtoNmap,xadj[nvtxs],adjncy,idxs_mapped);CHKERRQ(ierr);
2687     for (i=0;i<nvtxs;i++) {
2688       PetscInt    row,*cols,ncols;
2689       PetscScalar *mat_vals;
2690 
2691       row = idxs_B[i];
2692       ncols = xadj[i+1]-xadj[i];
2693       cols = idxs_mapped+xadj[i];
2694       mat_vals = vals+xadj[i];
2695       ierr = MatSetValuesLocal(pcbddc->ChangeOfBasisMatrix,1,&row,ncols,cols,mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2696     }
2697     ierr = MatRestoreRowIJ(localChangeOfBasisMatrix,0,PETSC_FALSE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&done);CHKERRQ(ierr);
2698     if (!done) {
2699       SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2700     }
2701     ierr = MatSeqAIJRestoreArray(localChangeOfBasisMatrix,&vals);CHKERRQ(ierr);
2702     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&idxs_B);CHKERRQ(ierr);
2703     ierr = PetscFree(idxs_mapped);CHKERRQ(ierr);
2704     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2705     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706 
2707     /* check */
2708     if (pcbddc->dbg_flag) {
2709       PetscReal error;
2710       Vec       x,x_change;
2711 
2712       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2713       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2714       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2715       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2716       ierr = VecScatterBegin(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717       ierr = VecScatterEnd(pcis->global_to_B,x,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr);
2719       ierr = VecScatterBegin(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2720       ierr = VecScatterEnd(pcis->global_to_B,pcis->vec2_B,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2721       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2722       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2723       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2724       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2725       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on B: %1.6e\n",error);CHKERRQ(ierr);
2726       ierr = VecDestroy(&x);CHKERRQ(ierr);
2727       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2728     }
2729 
2730     /* adapt sub_schurs computed (if any) */
2731     if (pcbddc->use_deluxe_scaling) {
2732       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2733       if (sub_schurs->n_subs_par_g) {
2734         SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Change of basis with deluxe scaling and parallel problems still needs to be implemented");
2735       }
2736       if (sub_schurs->S_Ej_all) {
2737         Mat S_1,S_2,tmat;
2738         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,sub_schurs->is_Ej_all,sub_schurs->is_Ej_all,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr);
2739         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_1);CHKERRQ(ierr);
2740         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2741         sub_schurs->S_Ej_all = S_1;
2742         ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_2);CHKERRQ(ierr);
2743         ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2744         sub_schurs->sum_S_Ej_all = S_2;
2745         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2746       }
2747     }
2748     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2749   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2750     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2751     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2752   }
2753 
2754   /* set up change of basis context */
2755   if (pcbddc->ChangeOfBasisMatrix) {
2756     PCBDDCChange_ctx change_ctx;
2757 
2758     if (!pcbddc->new_global_mat) {
2759       PetscInt global_size,local_size;
2760 
2761       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2762       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2763       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2764       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2765       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2766       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2767       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2768       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2769       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2770     } else {
2771       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2772       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2773       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2774     }
2775     if (!pcbddc->user_ChangeOfBasisMatrix) {
2776       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2777       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2778     } else {
2779       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2780       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2781     }
2782     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2783     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2784     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2785     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2786   }
2787 
2788   /* get indices in local ordering for vertices and constraints */
2789   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2790     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2791     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2792   }
2793   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2794   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2795   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2796   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2797   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2798   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2799   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2800   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2801   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2802   /* set quantities in PCBDDC data struct */
2803   pcbddc->n_actual_vertices = i;
2804   /* check if a new primal space has been introduced */
2805   pcbddc->new_primal_space_local = PETSC_TRUE;
2806   if (olocal_primal_size == pcbddc->local_primal_size) {
2807     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2808     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2809     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2810   }
2811   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2812   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2813 
2814   /* flush dbg viewer */
2815   if (pcbddc->dbg_flag) {
2816     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2817   }
2818 
2819   /* free workspace */
2820   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2821   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2822   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2823   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2824   if (!pcbddc->adaptive_selection) {
2825     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2826     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2827   } else {
2828     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2829                       pcbddc->adaptive_constraints_ptrs,
2830                       pcbddc->adaptive_constraints_idxs,
2831                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2832     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2833   }
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 #undef __FUNCT__
2838 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2839 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2840 {
2841   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2842   PC_IS       *pcis = (PC_IS*)pc->data;
2843   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2844   PetscInt    ierr,i,vertex_size;
2845   PetscViewer viewer=pcbddc->dbg_viewer;
2846 
2847   PetscFunctionBegin;
2848   /* Reset previously computed graph */
2849   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2850   /* Init local Graph struct */
2851   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2852 
2853   /* Check validity of the csr graph passed in by the user */
2854   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2855     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2856   }
2857 
2858   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2859   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2860     Mat       mat_adj;
2861     PetscInt  *xadj,*adjncy;
2862     PetscInt  nvtxs;
2863     PetscBool flg_row=PETSC_TRUE;
2864 
2865     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2866     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2867     if (!flg_row) {
2868       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2869     }
2870     if (pcbddc->use_local_adj) {
2871       ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2872       pcbddc->computed_rowadj = PETSC_TRUE;
2873     } else { /* just compute subdomain's connected components */
2874       IS                     is_dummy;
2875       ISLocalToGlobalMapping l2gmap_dummy;
2876       PetscInt               j,sum;
2877       PetscInt               *cxadj,*cadjncy;
2878       const PetscInt         *idxs;
2879       PCBDDCGraph            graph;
2880       PetscBT                is_on_boundary;
2881 
2882       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2883       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2884       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2885       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2886       ierr = PCBDDCGraphInit(graph,l2gmap_dummy);CHKERRQ(ierr);
2887       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2888       graph->xadj = xadj;
2889       graph->adjncy = adjncy;
2890       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2891       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2892 
2893       if (pcbddc->dbg_flag) {
2894         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains\n",PetscGlobalRank,graph->ncc);CHKERRQ(ierr);
2895         for (i=0;i<graph->ncc;i++) {
2896           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2897         }
2898         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2899       }
2900 
2901       ierr = PetscBTCreate(nvtxs,&is_on_boundary);CHKERRQ(ierr);
2902       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2903       for (i=0;i<pcis->n_B;i++) {
2904         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2905       }
2906       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2907 
2908       ierr = PetscCalloc1(nvtxs+1,&cxadj);CHKERRQ(ierr);
2909       sum = 0;
2910       for (i=0;i<graph->ncc;i++) {
2911         PetscInt sizecc = 0;
2912         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2913           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2914             sizecc++;
2915           }
2916         }
2917         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2918           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2919             cxadj[graph->queue[j]] = sizecc;
2920           }
2921         }
2922         sum += sizecc*sizecc;
2923       }
2924       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2925       sum = 0;
2926       for (i=0;i<nvtxs;i++) {
2927         PetscInt temp = cxadj[i];
2928         cxadj[i] = sum;
2929         sum += temp;
2930       }
2931       cxadj[nvtxs] = sum;
2932       for (i=0;i<graph->ncc;i++) {
2933         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2934           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2935             PetscInt k,sizecc = 0;
2936             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2937               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2938                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2939                 sizecc++;
2940               }
2941             }
2942           }
2943         }
2944       }
2945       if (nvtxs) {
2946         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2947       } else {
2948         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2949         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2950       }
2951       graph->xadj = 0;
2952       graph->adjncy = 0;
2953       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2954       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2955     }
2956     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2957     if (!flg_row) {
2958       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2959     }
2960     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2961   }
2962 
2963   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2964   vertex_size = 1;
2965   if (pcbddc->user_provided_isfordofs) {
2966     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2967       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2968       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2969         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2970         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2971       }
2972       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2973       pcbddc->n_ISForDofs = 0;
2974       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2975     }
2976     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2977     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2978   } else {
2979     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2980       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2981       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2982       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2983         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2984       }
2985     }
2986   }
2987 
2988   /* Setup of Graph */
2989   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2990     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2991   }
2992   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2993     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2994   }
2995   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2996 
2997   /* Graph's connected components analysis */
2998   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2999 
3000   /* print some info to stdout */
3001   if (pcbddc->dbg_flag) {
3002     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
3003   }
3004 
3005   /* mark topography has done */
3006   pcbddc->recompute_topography = PETSC_FALSE;
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 #undef __FUNCT__
3011 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3012 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3013 {
3014   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3015   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3016   PetscErrorCode ierr;
3017 
3018   PetscFunctionBegin;
3019   n = 0;
3020   vertices = 0;
3021   if (pcbddc->ConstraintMatrix) {
3022     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3023     for (i=0;i<local_primal_size;i++) {
3024       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3025       if (size_of_constraint == 1) n++;
3026       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3027     }
3028     if (vertices_idx) {
3029       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3030       n = 0;
3031       for (i=0;i<local_primal_size;i++) {
3032         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3033         if (size_of_constraint == 1) {
3034           vertices[n++]=row_cmat_indices[0];
3035         }
3036         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3037       }
3038     }
3039   }
3040   *n_vertices = n;
3041   if (vertices_idx) *vertices_idx = vertices;
3042   PetscFunctionReturn(0);
3043 }
3044 
3045 #undef __FUNCT__
3046 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3047 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3048 {
3049   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3050   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3051   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3052   PetscBT        touched;
3053   PetscErrorCode ierr;
3054 
3055     /* This function assumes that the number of local constraints per connected component
3056        is not greater than the number of nodes defined for the connected component
3057        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3058   PetscFunctionBegin;
3059   n = 0;
3060   constraints_index = 0;
3061   if (pcbddc->ConstraintMatrix) {
3062     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3063     max_size_of_constraint = 0;
3064     for (i=0;i<local_primal_size;i++) {
3065       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3066       if (size_of_constraint > 1) {
3067         n++;
3068       }
3069       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3070       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3071     }
3072     if (constraints_idx) {
3073       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3074       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3075       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3076       n = 0;
3077       for (i=0;i<local_primal_size;i++) {
3078         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3079         if (size_of_constraint > 1) {
3080           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3081           /* find first untouched local node */
3082           j = 0;
3083           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3084           min_index = row_cmat_global_indices[j];
3085           min_loc = j;
3086           /* search the minimum among nodes not yet touched on the connected component
3087              since there can be more than one constraint on a single cc */
3088           for (j=1;j<size_of_constraint;j++) {
3089             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3090               min_index = row_cmat_global_indices[j];
3091               min_loc = j;
3092             }
3093           }
3094           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3095           constraints_index[n++] = row_cmat_indices[min_loc];
3096         }
3097         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3098       }
3099       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3100       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3101     }
3102   }
3103   *n_constraints = n;
3104   if (constraints_idx) *constraints_idx = constraints_index;
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 #undef __FUNCT__
3109 #define __FUNCT__ "PCBDDCSubsetNumbering"
3110 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3111 {
3112   Vec            local_vec,global_vec;
3113   IS             seqis,paris;
3114   VecScatter     scatter_ctx;
3115   PetscScalar    *array;
3116   PetscInt       *temp_global_dofs;
3117   PetscScalar    globalsum;
3118   PetscInt       i,j,s;
3119   PetscInt       nlocals,first_index,old_index,max_local;
3120   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
3121   PetscMPIInt    *dof_sizes,*dof_displs;
3122   PetscBool      first_found;
3123   PetscErrorCode ierr;
3124 
3125   PetscFunctionBegin;
3126   /* mpi buffers */
3127   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3128   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3129   j = ( !rank_prec_comm ? size_prec_comm : 0);
3130   ierr = PetscMalloc1(j,&dof_sizes);CHKERRQ(ierr);
3131   ierr = PetscMalloc1(j,&dof_displs);CHKERRQ(ierr);
3132   /* get maximum size of subset */
3133   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3134   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3135   max_local = 0;
3136   for (i=0;i<n_local_dofs;i++) {
3137     if (max_local < temp_global_dofs[i] ) {
3138       max_local = temp_global_dofs[i];
3139     }
3140   }
3141   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3142   max_global++;
3143   max_local = 0;
3144   for (i=0;i<n_local_dofs;i++) {
3145     if (max_local < local_dofs[i] ) {
3146       max_local = local_dofs[i];
3147     }
3148   }
3149   max_local++;
3150   /* allocate workspace */
3151   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3152   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3153   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3154   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3155   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3156   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3157   /* create scatter */
3158   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3159   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3160   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3161   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3162   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3163   /* init array */
3164   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3165   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3166   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3167   if (local_dofs_mult) {
3168     for (i=0;i<n_local_dofs;i++) {
3169       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3170     }
3171   } else {
3172     for (i=0;i<n_local_dofs;i++) {
3173       array[local_dofs[i]]=1.0;
3174     }
3175   }
3176   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3177   /* scatter into global vec and get total number of global dofs */
3178   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3179   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3180   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3181   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3182   /* Fill global_vec with cumulative function for global numbering */
3183   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3184   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3185   nlocals = 0;
3186   first_index = -1;
3187   first_found = PETSC_FALSE;
3188   for (i=0;i<s;i++) {
3189     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3190       first_found = PETSC_TRUE;
3191       first_index = i;
3192     }
3193     nlocals += (PetscInt)PetscRealPart(array[i]);
3194   }
3195   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3196   if (!rank_prec_comm) {
3197     dof_displs[0]=0;
3198     for (i=1;i<size_prec_comm;i++) {
3199       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3200     }
3201   }
3202   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3203   if (first_found) {
3204     array[first_index] += (PetscScalar)nlocals;
3205     old_index = first_index;
3206     for (i=first_index+1;i<s;i++) {
3207       if (PetscRealPart(array[i]) > 0.1) {
3208         array[i] += array[old_index];
3209         old_index = i;
3210       }
3211     }
3212   }
3213   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3214   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3215   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3216   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3217   /* get global ordering of local dofs */
3218   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3219   if (local_dofs_mult) {
3220     for (i=0;i<n_local_dofs;i++) {
3221       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3222     }
3223   } else {
3224     for (i=0;i<n_local_dofs;i++) {
3225       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3226     }
3227   }
3228   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3229   /* free workspace */
3230   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3231   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3232   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3233   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
3234   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
3235   /* return pointer to global ordering of local dofs */
3236   *global_numbering_subset = temp_global_dofs;
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 #undef __FUNCT__
3241 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3242 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3243 {
3244   PetscInt       i,j;
3245   PetscScalar    *alphas;
3246   PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   /* this implements stabilized Gram-Schmidt */
3250   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3251   for (i=0;i<n;i++) {
3252     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3253     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3254     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3255   }
3256   ierr = PetscFree(alphas);CHKERRQ(ierr);
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 #undef __FUNCT__
3261 #define __FUNCT__ "MatISGetSubassemblingPattern"
3262 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3263 {
3264   Mat             subdomain_adj;
3265   IS              new_ranks,ranks_send_to;
3266   MatPartitioning partitioner;
3267   Mat_IS          *matis;
3268   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3269   PetscInt        prank;
3270   PetscMPIInt     size,rank,color;
3271   PetscInt        *xadj,*adjncy,*oldranks;
3272   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3273   PetscInt        i,local_size,threshold=0;
3274   PetscErrorCode  ierr;
3275   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3276   PetscSubcomm    subcomm;
3277 
3278   PetscFunctionBegin;
3279   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3280   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3281   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3282 
3283   /* Get info on mapping */
3284   matis = (Mat_IS*)(mat->data);
3285   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3286   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3287 
3288   /* build local CSR graph of subdomains' connectivity */
3289   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3290   xadj[0] = 0;
3291   xadj[1] = PetscMax(n_neighs-1,0);
3292   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3293   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3294 
3295   if (threshold) {
3296     PetscInt xadj_count = 0;
3297     for (i=1;i<n_neighs;i++) {
3298       if (n_shared[i] > threshold) {
3299         adjncy[xadj_count] = neighs[i];
3300         adjncy_wgt[xadj_count] = n_shared[i];
3301         xadj_count++;
3302       }
3303     }
3304     xadj[1] = xadj_count;
3305   } else {
3306     if (xadj[1]) {
3307       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3308       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3309     }
3310   }
3311   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3312   if (use_square) {
3313     for (i=0;i<xadj[1];i++) {
3314       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3315     }
3316   }
3317   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3318 
3319   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3320 
3321   /*
3322     Restrict work on active processes only.
3323   */
3324   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3325   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3326   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3327   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3328   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3329   if (color) {
3330     ierr = PetscFree(xadj);CHKERRQ(ierr);
3331     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3332     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3333   } else {
3334     PetscInt coarsening_ratio;
3335     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
3336     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3337     prank = rank;
3338     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
3339     /*
3340     for (i=0;i<size;i++) {
3341       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3342     }
3343     */
3344     for (i=0;i<xadj[1];i++) {
3345       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3346     }
3347     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3348     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3349     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3350 
3351     /* Partition */
3352     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
3353     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3354     if (use_vwgt) {
3355       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3356       v_wgt[0] = local_size;
3357       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3358     }
3359     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3360     coarsening_ratio = size/n_subdomains;
3361     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3362     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3363     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3364     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3365 
3366     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3367     if (contiguous) {
3368       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3369     } else {
3370       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3371     }
3372     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3373     /* clean up */
3374     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3375     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3376     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3377     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3378   }
3379   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3380 
3381   /* assemble parallel IS for sends */
3382   i = 1;
3383   if (color) i=0;
3384   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3385 
3386   /* get back IS */
3387   *is_sends = ranks_send_to;
3388   PetscFunctionReturn(0);
3389 }
3390 
3391 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3392 
3393 #undef __FUNCT__
3394 #define __FUNCT__ "MatISSubassemble"
3395 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3396 {
3397   Mat                    local_mat;
3398   Mat_IS                 *matis;
3399   IS                     is_sends_internal;
3400   PetscInt               rows,cols,new_local_rows;
3401   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3402   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3403   ISLocalToGlobalMapping l2gmap;
3404   PetscInt*              l2gmap_indices;
3405   const PetscInt*        is_indices;
3406   MatType                new_local_type;
3407   /* buffers */
3408   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3409   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3410   PetscInt               *recv_buffer_idxs_local;
3411   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3412   /* MPI */
3413   MPI_Comm               comm,comm_n;
3414   PetscSubcomm           subcomm;
3415   PetscMPIInt            n_sends,n_recvs,commsize;
3416   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3417   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3418   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3419   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3420   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3421   PetscErrorCode         ierr;
3422 
3423   PetscFunctionBegin;
3424   /* TODO: add missing checks */
3425   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3426   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3427   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3428   PetscValidLogicalCollectiveInt(mat,nis,7);
3429   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3430   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3431   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3432   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3433   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3434   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3435   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3436   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3437     PetscInt mrows,mcols,mnrows,mncols;
3438     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3439     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3440     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3441     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3442     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3443     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3444   }
3445   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3446   PetscValidLogicalCollectiveInt(mat,bs,0);
3447   /* prepare IS for sending if not provided */
3448   if (!is_sends) {
3449     PetscBool pcontig = PETSC_TRUE;
3450     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3451     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3452   } else {
3453     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3454     is_sends_internal = is_sends;
3455   }
3456 
3457   /* get pointer of MATIS data */
3458   matis = (Mat_IS*)mat->data;
3459 
3460   /* get comm */
3461   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3462 
3463   /* compute number of sends */
3464   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3465   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3466 
3467   /* compute number of receives */
3468   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3469   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3470   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3471   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3472   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3473   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3474   ierr = PetscFree(iflags);CHKERRQ(ierr);
3475 
3476   /* restrict comm if requested */
3477   subcomm = 0;
3478   destroy_mat = PETSC_FALSE;
3479   if (restrict_comm) {
3480     PetscMPIInt color,rank,subcommsize;
3481     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3482     color = 0;
3483     if (n_sends && !n_recvs) color = 1; /* sending only processes will not partecipate in new comm */
3484     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3485     subcommsize = commsize - subcommsize;
3486     /* check if reuse has been requested */
3487     if (reuse == MAT_REUSE_MATRIX) {
3488       if (*mat_n) {
3489         PetscMPIInt subcommsize2;
3490         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3491         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3492         comm_n = PetscObjectComm((PetscObject)*mat_n);
3493       } else {
3494         comm_n = PETSC_COMM_SELF;
3495       }
3496     } else { /* MAT_INITIAL_MATRIX */
3497       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3498       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3499       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3500       comm_n = subcomm->comm;
3501     }
3502     /* flag to destroy *mat_n if not significative */
3503     if (color) destroy_mat = PETSC_TRUE;
3504   } else {
3505     comm_n = comm;
3506   }
3507 
3508   /* prepare send/receive buffers */
3509   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3510   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3511   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3512   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3513   if (nis) {
3514     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3515   }
3516 
3517   /* Get data from local matrices */
3518   if (!isdense) {
3519     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3520     /* TODO: See below some guidelines on how to prepare the local buffers */
3521     /*
3522        send_buffer_vals should contain the raw values of the local matrix
3523        send_buffer_idxs should contain:
3524        - MatType_PRIVATE type
3525        - PetscInt        size_of_l2gmap
3526        - PetscInt        global_row_indices[size_of_l2gmap]
3527        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3528     */
3529   } else {
3530     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3531     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3532     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3533     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3534     send_buffer_idxs[1] = i;
3535     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3536     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3537     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3538     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3539     for (i=0;i<n_sends;i++) {
3540       ilengths_vals[is_indices[i]] = len*len;
3541       ilengths_idxs[is_indices[i]] = len+2;
3542     }
3543   }
3544   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3545   /* additional is (if any) */
3546   if (nis) {
3547     PetscMPIInt psum;
3548     PetscInt j;
3549     for (j=0,psum=0;j<nis;j++) {
3550       PetscInt plen;
3551       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3552       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3553       psum += len+1; /* indices + lenght */
3554     }
3555     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3556     for (j=0,psum=0;j<nis;j++) {
3557       PetscInt plen;
3558       const PetscInt *is_array_idxs;
3559       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3560       send_buffer_idxs_is[psum] = plen;
3561       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3562       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3563       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3564       psum += plen+1; /* indices + lenght */
3565     }
3566     for (i=0;i<n_sends;i++) {
3567       ilengths_idxs_is[is_indices[i]] = psum;
3568     }
3569     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3570   }
3571 
3572   buf_size_idxs = 0;
3573   buf_size_vals = 0;
3574   buf_size_idxs_is = 0;
3575   for (i=0;i<n_recvs;i++) {
3576     buf_size_idxs += (PetscInt)olengths_idxs[i];
3577     buf_size_vals += (PetscInt)olengths_vals[i];
3578     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3579   }
3580   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3581   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3582   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3583 
3584   /* get new tags for clean communications */
3585   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3586   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3587   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3588 
3589   /* allocate for requests */
3590   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3591   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3592   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3593   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3594   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3595   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3596 
3597   /* communications */
3598   ptr_idxs = recv_buffer_idxs;
3599   ptr_vals = recv_buffer_vals;
3600   ptr_idxs_is = recv_buffer_idxs_is;
3601   for (i=0;i<n_recvs;i++) {
3602     source_dest = onodes[i];
3603     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3604     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3605     ptr_idxs += olengths_idxs[i];
3606     ptr_vals += olengths_vals[i];
3607     if (nis) {
3608       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3609       ptr_idxs_is += olengths_idxs_is[i];
3610     }
3611   }
3612   for (i=0;i<n_sends;i++) {
3613     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3614     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3615     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3616     if (nis) {
3617       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3618     }
3619   }
3620   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3621   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3622 
3623   /* assemble new l2g map */
3624   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3625   ptr_idxs = recv_buffer_idxs;
3626   new_local_rows = 0;
3627   for (i=0;i<n_recvs;i++) {
3628     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3629     ptr_idxs += olengths_idxs[i];
3630   }
3631   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3632   ptr_idxs = recv_buffer_idxs;
3633   new_local_rows = 0;
3634   for (i=0;i<n_recvs;i++) {
3635     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3636     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3637     ptr_idxs += olengths_idxs[i];
3638   }
3639   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3640   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3641   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3642 
3643   /* infer new local matrix type from received local matrices type */
3644   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3645   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3646   if (n_recvs) {
3647     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3648     ptr_idxs = recv_buffer_idxs;
3649     for (i=0;i<n_recvs;i++) {
3650       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3651         new_local_type_private = MATAIJ_PRIVATE;
3652         break;
3653       }
3654       ptr_idxs += olengths_idxs[i];
3655     }
3656     switch (new_local_type_private) {
3657       case MATDENSE_PRIVATE:
3658         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3659           new_local_type = MATSEQAIJ;
3660           bs = 1;
3661         } else { /* if I receive only 1 dense matrix */
3662           new_local_type = MATSEQDENSE;
3663           bs = 1;
3664         }
3665         break;
3666       case MATAIJ_PRIVATE:
3667         new_local_type = MATSEQAIJ;
3668         bs = 1;
3669         break;
3670       case MATBAIJ_PRIVATE:
3671         new_local_type = MATSEQBAIJ;
3672         break;
3673       case MATSBAIJ_PRIVATE:
3674         new_local_type = MATSEQSBAIJ;
3675         break;
3676       default:
3677         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3678         break;
3679     }
3680   } else { /* by default, new_local_type is seqdense */
3681     new_local_type = MATSEQDENSE;
3682     bs = 1;
3683   }
3684 
3685   /* create MATIS object if needed */
3686   if (reuse == MAT_INITIAL_MATRIX) {
3687     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3688     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3689   } else {
3690     /* it also destroys the local matrices */
3691     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3692   }
3693   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3694   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3695 
3696   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3697 
3698   /* Global to local map of received indices */
3699   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3700   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3701   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3702 
3703   /* restore attributes -> type of incoming data and its size */
3704   buf_size_idxs = 0;
3705   for (i=0;i<n_recvs;i++) {
3706     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3707     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3708     buf_size_idxs += (PetscInt)olengths_idxs[i];
3709   }
3710   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3711 
3712   /* set preallocation */
3713   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3714   if (!newisdense) {
3715     PetscInt *new_local_nnz=0;
3716 
3717     ptr_vals = recv_buffer_vals;
3718     ptr_idxs = recv_buffer_idxs_local;
3719     if (n_recvs) {
3720       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3721     }
3722     for (i=0;i<n_recvs;i++) {
3723       PetscInt j;
3724       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3725         for (j=0;j<*(ptr_idxs+1);j++) {
3726           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3727         }
3728       } else {
3729         /* TODO */
3730       }
3731       ptr_idxs += olengths_idxs[i];
3732     }
3733     if (new_local_nnz) {
3734       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3735       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3736       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3737       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3738       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3739       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3740     } else {
3741       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3742     }
3743     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3744   } else {
3745     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3746   }
3747 
3748   /* set values */
3749   ptr_vals = recv_buffer_vals;
3750   ptr_idxs = recv_buffer_idxs_local;
3751   for (i=0;i<n_recvs;i++) {
3752     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3753       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3754       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3755       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3756       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3757       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3758     } else {
3759       /* TODO */
3760     }
3761     ptr_idxs += olengths_idxs[i];
3762     ptr_vals += olengths_vals[i];
3763   }
3764   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3766   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3767   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3768   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3769   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3770 
3771 #if 0
3772   if (!restrict_comm) { /* check */
3773     Vec       lvec,rvec;
3774     PetscReal infty_error;
3775 
3776     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3777     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3778     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3779     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3780     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3781     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3782     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3783     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3784     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3785   }
3786 #endif
3787 
3788   /* assemble new additional is (if any) */
3789   if (nis) {
3790     PetscInt **temp_idxs,*count_is,j,psum;
3791 
3792     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3793     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3794     ptr_idxs = recv_buffer_idxs_is;
3795     psum = 0;
3796     for (i=0;i<n_recvs;i++) {
3797       for (j=0;j<nis;j++) {
3798         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3799         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3800         psum += plen;
3801         ptr_idxs += plen+1; /* shift pointer to received data */
3802       }
3803     }
3804     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3805     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3806     for (i=1;i<nis;i++) {
3807       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3808     }
3809     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3810     ptr_idxs = recv_buffer_idxs_is;
3811     for (i=0;i<n_recvs;i++) {
3812       for (j=0;j<nis;j++) {
3813         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3814         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3815         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3816         ptr_idxs += plen+1; /* shift pointer to received data */
3817       }
3818     }
3819     for (i=0;i<nis;i++) {
3820       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3821       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3822       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3823     }
3824     ierr = PetscFree(count_is);CHKERRQ(ierr);
3825     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3826     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3827   }
3828   /* free workspace */
3829   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3830   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3831   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3832   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3833   if (isdense) {
3834     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3835     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3836   } else {
3837     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3838   }
3839   if (nis) {
3840     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3841     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3842   }
3843   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3844   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3845   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3846   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3847   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3848   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3849   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3850   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3851   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3852   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3853   ierr = PetscFree(onodes);CHKERRQ(ierr);
3854   if (nis) {
3855     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3856     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3857     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3858   }
3859   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3860   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3861     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3862     for (i=0;i<nis;i++) {
3863       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3864     }
3865   }
3866   PetscFunctionReturn(0);
3867 }
3868 
3869 /* temporary hack into ksp private data structure */
3870 #include <petsc-private/kspimpl.h>
3871 
3872 #undef __FUNCT__
3873 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3874 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3875 {
3876   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3877   PC_IS                  *pcis = (PC_IS*)pc->data;
3878   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3879   MatNullSpace           CoarseNullSpace=NULL;
3880   ISLocalToGlobalMapping coarse_islg;
3881   IS                     coarse_is,*isarray;
3882   PetscInt               i,im_active=-1,active_procs=-1;
3883   PetscInt               nis,nisdofs,nisneu;
3884   PC                     pc_temp;
3885   PCType                 coarse_pc_type;
3886   KSPType                coarse_ksp_type;
3887   PetscBool              multilevel_requested,multilevel_allowed;
3888   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3889   Mat                    t_coarse_mat_is;
3890   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3891   PetscMPIInt            all_procs;
3892   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3893   PetscBool              compute_vecs = PETSC_FALSE;
3894   PetscScalar            *array;
3895   PetscErrorCode         ierr;
3896 
3897   PetscFunctionBegin;
3898   /* Assign global numbering to coarse dofs */
3899   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3900     compute_vecs = PETSC_TRUE;
3901     PetscInt ocoarse_size;
3902     ocoarse_size = pcbddc->coarse_size;
3903     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3904     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3905     /* see if we can avoid some work */
3906     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3907       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3908         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3909         coarse_reuse = PETSC_FALSE;
3910       } else { /* we can safely reuse already computed coarse matrix */
3911         coarse_reuse = PETSC_TRUE;
3912       }
3913     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3914       coarse_reuse = PETSC_FALSE;
3915     }
3916     /* reset any subassembling information */
3917     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3918     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3919   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3920     coarse_reuse = PETSC_TRUE;
3921   }
3922 
3923   /* count "active" (i.e. with positive local size) and "void" processes */
3924   im_active = !!(pcis->n);
3925   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3926   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3927   void_procs = all_procs-active_procs;
3928   csin_type_simple = PETSC_TRUE;
3929   redist = PETSC_FALSE;
3930   if (pcbddc->current_level && void_procs) {
3931     csin_ml = PETSC_TRUE;
3932     ncoarse_ml = void_procs;
3933     csin_ds = PETSC_TRUE;
3934     ncoarse_ds = void_procs;
3935   } else {
3936     csin_ml = PETSC_FALSE;
3937     ncoarse_ml = all_procs;
3938     if (void_procs) {
3939       csin_ds = PETSC_TRUE;
3940       ncoarse_ds = void_procs;
3941       csin_type_simple = PETSC_FALSE;
3942     } else {
3943       if (pcbddc->redistribute_coarse && pcbddc->redistribute_coarse < all_procs) {
3944         csin_ds = PETSC_TRUE;
3945         ncoarse_ds = pcbddc->redistribute_coarse;
3946         redist = PETSC_TRUE;
3947       } else {
3948         csin_ds = PETSC_FALSE;
3949         ncoarse_ds = all_procs;
3950       }
3951     }
3952   }
3953 
3954   /*
3955     test if we can go multilevel: three conditions must be satisfied:
3956     - we have not exceeded the number of levels requested
3957     - we can actually subassemble the active processes
3958     - we can find a suitable number of MPI processes where we can place the subassembled problem
3959   */
3960   multilevel_allowed = PETSC_FALSE;
3961   multilevel_requested = PETSC_FALSE;
3962   if (pcbddc->current_level < pcbddc->max_levels) {
3963     multilevel_requested = PETSC_TRUE;
3964     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
3965       multilevel_allowed = PETSC_FALSE;
3966     } else {
3967       multilevel_allowed = PETSC_TRUE;
3968     }
3969   }
3970   /* determine number of process partecipating to coarse solver */
3971   if (multilevel_allowed) {
3972     ncoarse = ncoarse_ml;
3973     csin = csin_ml;
3974   } else {
3975     ncoarse = ncoarse_ds;
3976     csin = csin_ds;
3977   }
3978 
3979   /* creates temporary l2gmap and IS for coarse indexes */
3980   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3981   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3982 
3983   /* creates temporary MATIS object for coarse matrix */
3984   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
3985   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3986   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
3987   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
3988 #if 0
3989   {
3990     PetscViewer viewer;
3991     char filename[256];
3992     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
3993     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
3994     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
3995     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
3996     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
3997   }
3998 #endif
3999   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4000   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4001   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4002   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4003   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4004 
4005   /* compute dofs splitting and neumann boundaries for coarse dofs */
4006   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4007     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4008     const PetscInt         *idxs;
4009     ISLocalToGlobalMapping tmap;
4010 
4011     /* create map between primal indices (in local representative ordering) and local primal numbering */
4012     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4013     /* allocate space for temporary storage */
4014     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4015     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4016     /* allocate for IS array */
4017     nisdofs = pcbddc->n_ISForDofsLocal;
4018     nisneu = !!pcbddc->NeumannBoundariesLocal;
4019     nis = nisdofs + nisneu;
4020     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4021     /* dofs splitting */
4022     for (i=0;i<nisdofs;i++) {
4023       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4024       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4025       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4026       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4027       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4028       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4029       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4030       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4031     }
4032     /* neumann boundaries */
4033     if (pcbddc->NeumannBoundariesLocal) {
4034       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4035       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4036       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4037       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4038       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4039       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4040       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4041       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4042     }
4043     /* free memory */
4044     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4045     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4046     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4047   } else {
4048     nis = 0;
4049     nisdofs = 0;
4050     nisneu = 0;
4051     isarray = NULL;
4052   }
4053   /* destroy no longer needed map */
4054   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4055 
4056   /* restrict on coarse candidates (if needed) */
4057   coarse_mat_is = NULL;
4058   if (csin) {
4059     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4060       if (redist) {
4061         PetscMPIInt rank;
4062         PetscInt spc,n_spc_p1,dest[1];
4063 
4064         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4065         spc = all_procs/pcbddc->redistribute_coarse;
4066         n_spc_p1 = all_procs%pcbddc->redistribute_coarse;
4067         if (rank > n_spc_p1*(spc+1)-1) {
4068           dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4069         } else {
4070           dest[0] = rank/(spc+1);
4071         }
4072         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),1,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4073       } else {
4074         PetscInt j,tissize,*nisindices;
4075         PetscInt *coarse_candidates;
4076         const PetscInt* tisindices;
4077         /* get coarse candidates' ranks in pc communicator */
4078         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4079         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4080         for (i=0,j=0;i<all_procs;i++) {
4081           if (!coarse_candidates[i]) {
4082             coarse_candidates[j]=i;
4083             j++;
4084           }
4085         }
4086         if (j < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",j,ncoarse);
4087         /* get a suitable subassembling pattern */
4088         if (csin_type_simple) {
4089           PetscMPIInt rank;
4090           PetscInt    issize,isidx;
4091           ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4092           if (im_active) {
4093             issize = 1;
4094             isidx = (PetscInt)rank;
4095           } else {
4096             issize = 0;
4097             isidx = -1;
4098           }
4099           ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4100         } else {
4101           ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4102         }
4103         if (pcbddc->dbg_flag) {
4104           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4105           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4106           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4107           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4108           for (i=0;i<j;i++) {
4109             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4110           }
4111           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4112           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4113         }
4114         /* shift the pattern on coarse candidates */
4115         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4116         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4117         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4118         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4119         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4120         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4121         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4122       }
4123     }
4124     if (pcbddc->dbg_flag) {
4125       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4126       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4127       ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4128       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4129     }
4130     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4131     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4132   } else {
4133     if (pcbddc->dbg_flag) {
4134       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4135       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4136       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4137     }
4138     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4139     coarse_mat_is = t_coarse_mat_is;
4140   }
4141 
4142   /* create local to global scatters for coarse problem */
4143   if (compute_vecs) {
4144     PetscInt lrows;
4145     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4146     if (coarse_mat_is) {
4147       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4148     } else {
4149       lrows = 0;
4150     }
4151     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4152     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4153     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4154     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4155     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4156   }
4157   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4158   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4159 
4160   /* set defaults for coarse KSP and PC */
4161   if (multilevel_allowed) {
4162     coarse_ksp_type = KSPRICHARDSON;
4163     coarse_pc_type = PCBDDC;
4164   } else {
4165     coarse_ksp_type = KSPPREONLY;
4166     coarse_pc_type = PCREDUNDANT;
4167   }
4168 
4169   /* print some info if requested */
4170   if (pcbddc->dbg_flag) {
4171     if (!multilevel_allowed) {
4172       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4173       if (multilevel_requested) {
4174         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4175       } else if (pcbddc->max_levels) {
4176         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4177       }
4178       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4179     }
4180   }
4181 
4182   /* create the coarse KSP object only once with defaults */
4183   if (coarse_mat_is) {
4184     MatReuse coarse_mat_reuse;
4185     PetscViewer dbg_viewer = NULL;
4186     if (pcbddc->dbg_flag) {
4187       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4188       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4189     }
4190     if (!pcbddc->coarse_ksp) {
4191       char prefix[256],str_level[16];
4192       size_t len;
4193       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4194       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4195       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4196       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4197       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4198       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4199       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4200       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4201       /* prefix */
4202       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4203       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4204       if (!pcbddc->current_level) {
4205         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4206         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4207       } else {
4208         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4209         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4210         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4211         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4212         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4213         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4214       }
4215       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4216       /* allow user customization */
4217       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4218       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4219     }
4220 
4221     /* get some info after set from options */
4222     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4223     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4224     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4225     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4226     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4227       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4228       isbddc = PETSC_FALSE;
4229     }
4230     if (isredundant) {
4231       KSP inner_ksp;
4232       PC inner_pc;
4233       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4234       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4235       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4236     }
4237 
4238     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4239     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4240     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4241     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4242     if (nisdofs) {
4243       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4244       for (i=0;i<nisdofs;i++) {
4245         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4246       }
4247     }
4248     if (nisneu) {
4249       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4250       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4251     }
4252 
4253     /* assemble coarse matrix */
4254     if (coarse_reuse) {
4255       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4256       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4257       coarse_mat_reuse = MAT_REUSE_MATRIX;
4258     } else {
4259       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4260     }
4261     if (isbddc || isnn) {
4262       if (pcbddc->coarsening_ratio > 1) {
4263         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4264           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4265           if (pcbddc->dbg_flag) {
4266             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4267             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4268             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4269             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4270           }
4271         }
4272         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4273       } else {
4274         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4275         coarse_mat = coarse_mat_is;
4276       }
4277     } else {
4278       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4279     }
4280     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4281 
4282     /* propagate symmetry info to coarse matrix */
4283     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4284     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4285 
4286     /* set operators */
4287     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4288     if (pcbddc->dbg_flag) {
4289       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4290     }
4291   } else { /* processes non partecipating to coarse solver (if any) */
4292     coarse_mat = 0;
4293   }
4294   ierr = PetscFree(isarray);CHKERRQ(ierr);
4295 #if 0
4296   {
4297     PetscViewer viewer;
4298     char filename[256];
4299     sprintf(filename,"coarse_mat.m");
4300     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4301     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4302     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4303     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4304   }
4305 #endif
4306 
4307   /* Compute coarse null space (special handling by BDDC only) */
4308   if (pcbddc->NullSpace) {
4309     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4310   }
4311 
4312   if (pcbddc->coarse_ksp) {
4313     Vec crhs,csol;
4314     PetscBool ispreonly;
4315     if (CoarseNullSpace) {
4316       if (isbddc) {
4317         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4318       } else {
4319         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4320       }
4321     }
4322     /* setup coarse ksp */
4323     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4324     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4325     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4326     /* hack */
4327     if (!csol) {
4328       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4329     }
4330     if (!crhs) {
4331       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4332     }
4333     /* Check coarse problem if in debug mode or if solving with an iterative method */
4334     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4335     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4336       KSP       check_ksp;
4337       KSPType   check_ksp_type;
4338       PC        check_pc;
4339       Vec       check_vec,coarse_vec;
4340       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4341       PetscInt  its;
4342       PetscBool compute_eigs;
4343       PetscReal *eigs_r,*eigs_c;
4344       PetscInt  neigs;
4345       const char *prefix;
4346 
4347       /* Create ksp object suitable for estimation of extreme eigenvalues */
4348       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4349       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4350       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4351       if (ispreonly) {
4352         check_ksp_type = KSPPREONLY;
4353         compute_eigs = PETSC_FALSE;
4354       } else {
4355         check_ksp_type = KSPGMRES;
4356         compute_eigs = PETSC_TRUE;
4357       }
4358       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4359       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4360       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4361       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4362       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4363       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4364       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4365       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4366       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4367       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4368       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4369       /* create random vec */
4370       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4371       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4372       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4373       if (CoarseNullSpace) {
4374         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4375       }
4376       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4377       /* solve coarse problem */
4378       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4379       if (CoarseNullSpace) {
4380         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4381       }
4382       /* set eigenvalue estimation if preonly has not been requested */
4383       if (compute_eigs) {
4384         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4385         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4386         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4387         lambda_max = eigs_r[neigs-1];
4388         lambda_min = eigs_r[0];
4389         if (pcbddc->use_coarse_estimates) {
4390           if (lambda_max>lambda_min) {
4391             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4392             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4393           }
4394         }
4395       }
4396 
4397       /* check coarse problem residual error */
4398       if (pcbddc->dbg_flag) {
4399         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4400         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4401         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4402         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4403         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4404         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4405         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4406         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (%d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4407         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4408         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4409         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4410         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4411         if (compute_eigs) {
4412           PetscReal lambda_max_s,lambda_min_s;
4413           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4414           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4415           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4416           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4417           for (i=0;i<neigs;i++) {
4418             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4419           }
4420         }
4421         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4422         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4423       }
4424       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4425       if (compute_eigs) {
4426         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4427         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4428       }
4429     }
4430   }
4431   /* print additional info */
4432   if (pcbddc->dbg_flag) {
4433     /* waits until all processes reaches this point */
4434     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4435     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4436     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4437   }
4438 
4439   /* free memory */
4440   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4441   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4442   PetscFunctionReturn(0);
4443 }
4444 
4445 #undef __FUNCT__
4446 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4447 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4448 {
4449   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4450   PC_IS*         pcis = (PC_IS*)pc->data;
4451   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4452   PetscInt       i,coarse_size;
4453   PetscInt       *local_primal_indices;
4454   PetscErrorCode ierr;
4455 
4456   PetscFunctionBegin;
4457   /* Compute global number of coarse dofs */
4458   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4459     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4460   }
4461   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4462 
4463   /* check numbering */
4464   if (pcbddc->dbg_flag) {
4465     PetscScalar coarsesum,*array;
4466     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4467 
4468     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4469     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4470     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4471     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4472     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4473     for (i=0;i<pcbddc->local_primal_size;i++) {
4474       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4475     }
4476     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4477     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4478     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4479     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4480     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4481     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4482     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4483     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4484     for (i=0;i<pcis->n;i++) {
4485       if (array[i] == 1.0) {
4486         set_error = PETSC_TRUE;
4487         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4488       }
4489     }
4490     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4491     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4492     for (i=0;i<pcis->n;i++) {
4493       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4494     }
4495     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4496     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4497     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4498     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4499     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4500     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4501     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4502       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4503       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4504       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4505       for (i=0;i<pcbddc->local_primal_size;i++) {
4506         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
4507       }
4508       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4509     }
4510     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4511     if (set_error_reduced) {
4512       SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4513     }
4514   }
4515   /* get back data */
4516   *coarse_size_n = coarse_size;
4517   *local_primal_indices_n = local_primal_indices;
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 #undef __FUNCT__
4522 #define __FUNCT__ "PCBDDCGlobalToLocal"
4523 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4524 {
4525   IS             localis_t;
4526   PetscInt       i,lsize,*idxs,n;
4527   PetscScalar    *vals;
4528   PetscErrorCode ierr;
4529 
4530   PetscFunctionBegin;
4531   /* get indices in local ordering exploiting local to global map */
4532   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4533   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4534   for (i=0;i<lsize;i++) vals[i] = 1.0;
4535   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4536   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4537   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4538   if (idxs) { /* multilevel guard */
4539     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4540   }
4541   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4542   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4543   ierr = PetscFree(vals);CHKERRQ(ierr);
4544   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4545   /* now compute set in local ordering */
4546   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4547   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4548   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4549   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4550   for (i=0,lsize=0;i<n;i++) {
4551     if (PetscRealPart(vals[i]) > 0.5) {
4552       lsize++;
4553     }
4554   }
4555   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4556   for (i=0,lsize=0;i<n;i++) {
4557     if (PetscRealPart(vals[i]) > 0.5) {
4558       idxs[lsize++] = i;
4559     }
4560   }
4561   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4562   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4563   *localis = localis_t;
4564   PetscFunctionReturn(0);
4565 }
4566 
4567 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4568 #undef __FUNCT__
4569 #define __FUNCT__ "PCBDDCMatMult_Private"
4570 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4571 {
4572   PCBDDCChange_ctx change_ctx;
4573   PetscErrorCode   ierr;
4574 
4575   PetscFunctionBegin;
4576   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4577   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4578   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4579   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4580   PetscFunctionReturn(0);
4581 }
4582 
4583 #undef __FUNCT__
4584 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4585 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4586 {
4587   PCBDDCChange_ctx change_ctx;
4588   PetscErrorCode   ierr;
4589 
4590   PetscFunctionBegin;
4591   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4592   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4593   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4594   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4595   PetscFunctionReturn(0);
4596 }
4597 
4598 #undef __FUNCT__
4599 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4600 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4601 {
4602   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4603   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4604   PetscInt            *used_xadj,*used_adjncy;
4605   PetscBool           free_used_adj;
4606   PetscErrorCode      ierr;
4607 
4608   PetscFunctionBegin;
4609   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4610   free_used_adj = PETSC_FALSE;
4611   if (pcbddc->sub_schurs_layers == -1) {
4612     used_xadj = NULL;
4613     used_adjncy = NULL;
4614   } else {
4615     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4616       used_xadj = pcbddc->mat_graph->xadj;
4617       used_adjncy = pcbddc->mat_graph->adjncy;
4618     } else if (pcbddc->computed_rowadj) {
4619       used_xadj = pcbddc->mat_graph->xadj;
4620       used_adjncy = pcbddc->mat_graph->adjncy;
4621     } else {
4622       Mat            mat_adj;
4623       PetscBool      flg_row=PETSC_TRUE;
4624       const PetscInt *xadj,*adjncy;
4625       PetscInt       nvtxs;
4626 
4627       ierr = MatConvert(pcbddc->local_mat,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
4628       ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4629       if (!flg_row) {
4630         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
4631       }
4632       ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4633       ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4634       ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4635       ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4636       if (!flg_row) {
4637         SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
4638       }
4639       ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
4640       free_used_adj = PETSC_TRUE;
4641     }
4642   }
4643   ierr = PCBDDCSubSchursSetUp(sub_schurs,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->adaptive_selection,pcbddc->use_deluxe_scaling,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4644 
4645   /* free adjacency */
4646   if (free_used_adj) {
4647     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4648   }
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 #undef __FUNCT__
4653 #define __FUNCT__ "PCBDDCInitSubSchurs"
4654 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4655 {
4656   PC_IS               *pcis=(PC_IS*)pc->data;
4657   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4658   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4659   PCBDDCGraph         graph;
4660   Mat                 S_j;
4661   PetscErrorCode      ierr;
4662 
4663   PetscFunctionBegin;
4664   /* attach interface graph for determining subsets */
4665   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4666     IS verticesIS;
4667 
4668     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4669     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4670     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap);CHKERRQ(ierr);
4671     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4672     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4673     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4674 /*
4675     if (pcbddc->dbg_flag) {
4676       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4677     }
4678 */
4679   } else {
4680     graph = pcbddc->mat_graph;
4681   }
4682 
4683   /* Create Schur complement matrix */
4684   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4685   ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4686 
4687   /* sub_schurs init */
4688   ierr = PCBDDCSubSchursInit(sub_schurs,pcbddc->local_mat,S_j,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_threshold);CHKERRQ(ierr);
4689   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4690   /* free graph struct */
4691   if (pcbddc->sub_schurs_rebuild) {
4692     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4693   }
4694   PetscFunctionReturn(0);
4695 }
4696