xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision a64f4aa4610dd4ec03fac46179ae53980bc33f9b)
1 #include <../src/ksp/pc/impls/bddc/bddc.h>
2 #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3 #include <petscblaslapack.h>
4 
5 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y);
6 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y);
7 
8 #undef __FUNCT__
9 #define __FUNCT__ "PCBDDCAdaptiveSelection"
10 PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
11 {
12   PC_BDDC*        pcbddc = (PC_BDDC*)pc->data;
13   PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
14   PetscBLASInt    B_dummyint,B_neigs,B_ierr,B_lwork;
15   PetscBLASInt    *B_iwork,*B_ifail;
16   PetscScalar     *work,lwork;
17   PetscScalar     *St,*S,*eigv;
18   PetscScalar     *Sarray,*Starray;
19   PetscReal       *eigs,thresh;
20   PetscInt        i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
21   PetscBool       allocated_S_St;
22 #if defined(PETSC_USE_COMPLEX)
23   PetscReal       *rwork;
24 #endif
25   PetscErrorCode  ierr;
26 
27   PetscFunctionBegin;
28   if (!sub_schurs->use_mumps) {
29     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS");
30   }
31 
32   if (pcbddc->dbg_flag) {
33     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
34     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
35     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr);
36     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
37   }
38 
39   if (pcbddc->dbg_flag) {
40     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
41   }
42 
43   if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) {
44     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\n",sub_schurs->is_hermitian,sub_schurs->is_posdef);
45   }
46 
47   /* max size of subsets */
48   mss = 0;
49   for (i=0;i<sub_schurs->n_subs;i++) {
50     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
51       PetscInt subset_size;
52       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
53       mss = PetscMax(mss,subset_size);
54     }
55   }
56 
57   /* min/max and threshold */
58   nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
59   nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
60   nmax = PetscMax(nmin,nmax);
61   allocated_S_St = PETSC_FALSE;
62   if (nmin) {
63     allocated_S_St = PETSC_TRUE;
64   }
65 
66   /* allocate lapack workspace */
67   cum = cum2 = 0;
68   maxneigs = 0;
69   for (i=0;i<sub_schurs->n_subs;i++) {
70     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
71       PetscInt n,subset_size;
72 
73       ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
74       n = PetscMin(subset_size,nmax);
75       cum += subset_size*n;
76       cum2 += n;
77       maxneigs = PetscMax(maxneigs,n);
78     }
79   }
80   if (mss) {
81     if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
82       PetscBLASInt B_itype = 1;
83       PetscBLASInt B_N = mss;
84       PetscReal    zero = 0.0;
85       PetscReal    eps = 0.0; /* dlamch? */
86 
87       B_lwork = -1;
88       S = NULL;
89       St = NULL;
90       eigs = NULL;
91       eigv = NULL;
92       B_iwork = NULL;
93       B_ifail = NULL;
94       thresh = 1.0;
95       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
96 #if defined(PETSC_USE_COMPLEX)
97       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
98 #else
99       PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
100 #endif
101       if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
102       ierr = PetscFPTrapPop();CHKERRQ(ierr);
103     } else {
104         /* TODO */
105     }
106   } else {
107     lwork = 0;
108   }
109 
110   nv = 0;
111   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) { /* complement of subsets, each entry is a vertex */
112     ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&nv);CHKERRQ(ierr);
113   }
114   ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr);
115   if (allocated_S_St) {
116     ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr);
117   }
118   ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr);
119 #if defined(PETSC_USE_COMPLEX)
120   ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr);
121 #endif
122   ierr = PetscMalloc4(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
123                       nv+cum2+1,&pcbddc->adaptive_constraints_ptrs,
124                       nv+cum,&pcbddc->adaptive_constraints_idxs,
125                       nv+cum,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
126   ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr);
127 
128   maxneigs = 0;
129   cum = cum2 = cumarray = 0;
130   if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
131     const PetscInt *idxs;
132 
133     ierr = ISGetIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
134     for (cum=0;cum<nv;cum++) {
135       pcbddc->adaptive_constraints_n[cum] = 1;
136       pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
137       pcbddc->adaptive_constraints_ptrs[cum] = cum;
138       pcbddc->adaptive_constraints_data[cum] = 1.0;
139     }
140     cum2 = cum;
141     ierr = ISRestoreIndices(sub_schurs->is_Ej_com,&idxs);CHKERRQ(ierr);
142   }
143 
144   if (mss) { /* multilevel */
145     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
146     ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
147   }
148 
149   for (i=0;i<sub_schurs->n_subs;i++) {
150     PetscInt j,subset_size;
151 
152     ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr);
153     if (PetscBTLookup(sub_schurs->computed_Stilda_subs,i)) {
154       const PetscInt *idxs;
155       PetscReal      infty = PETSC_MAX_REAL;
156       PetscInt       eigs_start = 0;
157       PetscBLASInt   B_N;
158 
159       ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr);
160       if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
161         if (sub_schurs->is_hermitian) {
162           PetscInt j;
163           for (j=0;j<subset_size;j++) {
164             ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
165           }
166           for (j=0;j<subset_size;j++) {
167             ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
168           }
169         } else {
170           ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
171           ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
172         }
173       } else {
174         S = Sarray + cumarray;
175         St = Starray + cumarray;
176       }
177 
178       /* Threshold: this is an heuristic for edges */
179       ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
180       thresh = pcbddc->mat_graph->count[idxs[0]]*pcbddc->adaptive_threshold;
181 
182       if (sub_schurs->is_hermitian && sub_schurs->is_posdef) {
183         PetscBLASInt B_itype = 1;
184         PetscBLASInt B_IL, B_IU;
185         PetscReal    eps = -1.0; /* dlamch? */
186         PetscInt     nmin_s;
187 
188         /* ask for eigenvalues larger than thresh */
189         if (pcbddc->dbg_flag) {
190           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d %d %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]);
191         }
192         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
193 #if defined(PETSC_USE_COMPLEX)
194         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
195 #else
196         PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
197 #endif
198         ierr = PetscFPTrapPop();CHKERRQ(ierr);
199         if (B_ierr) {
200           if (B_ierr < 0 ) {
201             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
202           } else if (B_ierr <= B_N) {
203             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
204           } else {
205             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
206           }
207         }
208 
209         if (B_neigs > nmax) {
210           if (pcbddc->dbg_flag) {
211             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, more than maximum required %d.\n",B_neigs,nmax);
212           }
213           eigs_start = B_neigs -nmax;
214           B_neigs = nmax;
215         }
216 
217         nmin_s = PetscMin(nmin,B_N);
218         if (B_neigs < nmin_s) {
219           PetscBLASInt B_neigs2;
220 
221           B_IU = B_N - B_neigs;
222           B_IL = B_N - nmin_s + 1;
223           if (pcbddc->dbg_flag) {
224             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
225           }
226           if (sub_schurs->is_hermitian) {
227             PetscInt j;
228             for (j=0;j<subset_size;j++) {
229               ierr = PetscMemcpy(S+j*(subset_size+1),Sarray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
230             }
231             for (j=0;j<subset_size;j++) {
232               ierr = PetscMemcpy(St+j*(subset_size+1),Starray+cumarray+j*(subset_size+1),(subset_size-j)*sizeof(PetscScalar));CHKERRQ(ierr);
233             }
234           } else {
235             ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
236             ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
237           }
238           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
239 #if defined(PETSC_USE_COMPLEX)
240           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
241 #else
242           PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&thresh,&infty,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
243 #endif
244           ierr = PetscFPTrapPop();CHKERRQ(ierr);
245           B_neigs += B_neigs2;
246         }
247         if (B_ierr) {
248           if (B_ierr < 0 ) {
249             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
250           } else if (B_ierr <= B_N) {
251             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
252           } else {
253             SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
254           }
255         }
256         if (pcbddc->dbg_flag) {
257           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Got %d eigs\n",B_neigs);
258           for (j=0;j<B_neigs;j++) {
259             if (eigs[j] == 0.0) {
260               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     Inf\n");
261             } else {
262               PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"     %1.6e\n",eigs[j+eigs_start]);
263             }
264           }
265         }
266       } else {
267           /* TODO */
268       }
269 
270       maxneigs = PetscMax(B_neigs,maxneigs);
271       pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
272       ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+cum2,eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr);
273 
274       if (pcbddc->dbg_flag > 1) {
275         PetscInt ii;
276         for (ii=0;ii<B_neigs;ii++) {
277           PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"   -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
278           for (j=0;j<B_N;j++) {
279             PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"       %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+cum2]);
280           }
281         }
282       }
283       for (j=0;j<B_neigs;j++) {
284 #if 0
285         {
286           PetscBLASInt Blas_N,Blas_one = 1.0;
287           PetscScalar norm;
288           ierr = PetscBLASIntCast(subset_size,&Blas_N);CHKERRQ(ierr);
289           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,pcbddc->adaptive_constraints_data+cum2,&Blas_one,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
290           if (pcbddc->adaptive_constraints_data[cum2] > 0.0) {
291             norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
292           } else {
293             norm = -1.0/PetscSqrtReal(PetscRealPart(norm));
294           }
295           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,pcbddc->adaptive_constraints_data+cum2,&Blas_one));
296         }
297 #endif
298         ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+cum2,idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr);
299         pcbddc->adaptive_constraints_ptrs[cum++] = cum2;
300         cum2 += subset_size;
301       }
302       ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr);
303     }
304     /* shift for next computation */
305     cumarray += subset_size*subset_size;
306   }
307   if (pcbddc->dbg_flag) {
308     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
309   }
310   pcbddc->adaptive_constraints_ptrs[cum] = cum2;
311 
312   if (mss) {
313     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr);
314     ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr);
315     /* destroy matrices (junk) */
316     ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr);
317     ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr);
318   }
319   if (allocated_S_St) {
320     ierr = PetscFree2(S,St);CHKERRQ(ierr);
321   }
322   ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr);
323 #if defined(PETSC_USE_COMPLEX)
324   ierr = PetscFree(rwork);CHKERRQ(ierr);
325 #endif
326   if (pcbddc->dbg_flag) {
327     PetscInt maxneigs_r;
328     ierr = MPI_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
329     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr);
330   }
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "PCBDDCSetUpSolvers"
336 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
337 {
338   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
339   PetscScalar    *coarse_submat_vals;
340   PetscErrorCode ierr;
341 
342   PetscFunctionBegin;
343   /* Setup local scatters R_to_B and (optionally) R_to_D */
344   /* PCBDDCSetUpLocalWorkVectors should be called first! */
345   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
346 
347   /* Setup local neumann solver ksp_R */
348   /* PCBDDCSetUpLocalScatters should be called first! */
349   ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr);
350 
351   /* Change global null space passed in by the user if change of basis has been requested */
352   if (pcbddc->NullSpace && pcbddc->ChangeOfBasisMatrix) {
353     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
354   }
355 
356   /*
357      Setup local correction and local part of coarse basis.
358      Gives back the dense local part of the coarse matrix in column major ordering
359   */
360   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
361 
362   /* Compute total number of coarse nodes and setup coarse solver */
363   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
364 
365   /* free */
366   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "PCBDDCResetCustomization"
372 PetscErrorCode PCBDDCResetCustomization(PC pc)
373 {
374   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
379   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
380   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
381   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
382   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
383   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
384   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
385   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
386   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
387   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
388   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
389   PetscFunctionReturn(0);
390 }
391 
392 #undef __FUNCT__
393 #define __FUNCT__ "PCBDDCResetTopography"
394 PetscErrorCode PCBDDCResetTopography(PC pc)
395 {
396   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
401   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
402   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
403   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
404   ierr = PCBDDCSubSchursReset(pcbddc->sub_schurs);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "PCBDDCResetSolvers"
410 PetscErrorCode PCBDDCResetSolvers(PC pc)
411 {
412   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
413   PetscScalar    *array;
414   PetscErrorCode ierr;
415 
416   PetscFunctionBegin;
417   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
418   if (pcbddc->coarse_phi_B) {
419     ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr);
420     ierr = PetscFree(array);CHKERRQ(ierr);
421   }
422   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
423   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
424   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
425   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
426   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
427   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
428   if (pcbddc->local_auxmat2) {
429     ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr);
430     ierr = PetscFree(array);CHKERRQ(ierr);
431   }
432   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
433   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
434   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
435   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
436   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
437   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
438   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
439   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
440   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
441   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
442   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
443   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
444   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
445   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
446   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
447   ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
453 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
454 {
455   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
456   PC_IS          *pcis = (PC_IS*)pc->data;
457   VecType        impVecType;
458   PetscInt       n_constraints,n_R,old_size;
459   PetscErrorCode ierr;
460 
461   PetscFunctionBegin;
462   if (!pcbddc->ConstraintMatrix) {
463     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
464   }
465   /* get sizes */
466   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
467   n_R = pcis->n-pcbddc->n_actual_vertices;
468   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
469   /* local work vectors (try to avoid unneeded work)*/
470   /* R nodes */
471   old_size = -1;
472   if (pcbddc->vec1_R) {
473     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
474   }
475   if (n_R != old_size) {
476     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
477     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
478     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
479     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
480     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
481     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
482   }
483   /* local primal dofs */
484   old_size = -1;
485   if (pcbddc->vec1_P) {
486     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
487   }
488   if (pcbddc->local_primal_size != old_size) {
489     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
490     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
491     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
492     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
493   }
494   /* local explicit constraints */
495   old_size = -1;
496   if (pcbddc->vec1_C) {
497     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
498   }
499   if (n_constraints && n_constraints != old_size) {
500     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
501     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
502     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
503     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
504   }
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "PCBDDCSetUpCorrection"
510 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
511 {
512   PetscErrorCode         ierr;
513   /* pointers to pcis and pcbddc */
514   PC_IS*                 pcis = (PC_IS*)pc->data;
515   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
516   /* submatrices of local problem */
517   Mat                    A_RV,A_VR,A_VV;
518   /* submatrices of local coarse problem */
519   Mat                    S_VV,S_CV,S_VC,S_CC;
520   /* working matrices */
521   Mat                    C_CR;
522   /* additional working stuff */
523   PC                     pc_R;
524   Mat                    F;
525   PetscBool              isLU,isCHOL,isILU;
526 
527   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
528   PetscScalar            *work;
529   PetscInt               *idx_V_B;
530   PetscInt               n,n_vertices,n_constraints;
531   PetscInt               i,n_R,n_D,n_B;
532   PetscBool              unsymmetric_check;
533   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
534   MatType                impMatType;
535   /* some shortcuts to scalars */
536   PetscScalar            one=1.0,m_one=-1.0;
537 
538   PetscFunctionBegin;
539   /* get number of vertices (corners plus constraints with change of basis)
540      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
541   n_vertices = pcbddc->n_actual_vertices;
542   n_constraints = pcbddc->local_primal_size-n_vertices;
543   /* Set Non-overlapping dimensions */
544   n_B = pcis->n_B; n_D = pcis->n - n_B;
545   n_R = pcis->n-n_vertices;
546 
547   /* Set types for local objects needed by BDDC precondtioner */
548   impMatType = MATSEQDENSE;
549 
550   /* vertices in boundary numbering */
551   ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr);
552   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
553   if (i != n_vertices) {
554     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
555   }
556 
557   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
558   ierr = PetscMalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr);
559   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr);
560   ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr);
561   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr);
562   ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr);
563   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr);
564   ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr);
565   ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr);
566   ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr);
567 
568   unsymmetric_check = PETSC_FALSE;
569   /* allocate workspace */
570   n = 0;
571   if (n_constraints) {
572     n += n_R*n_constraints;
573   }
574   if (n_vertices) {
575     n = PetscMax(2*n_R*n_vertices,n);
576   }
577   if (!pcbddc->issym) {
578     n = PetscMax(2*n_R*pcbddc->local_primal_size,n);
579     unsymmetric_check = PETSC_TRUE;
580   }
581   ierr = PetscMalloc1(n,&work);CHKERRQ(ierr);
582 
583   /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
584   ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr);
585   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr);
586   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr);
587   ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr);
588   if (isLU || isILU || isCHOL) {
589     ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr);
590   } else {
591     F = NULL;
592   }
593 
594   /* Precompute stuffs needed for preprocessing and application of BDDC*/
595   if (n_constraints) {
596     Mat M1,M2,M3;
597     IS  is_aux;
598     /* see if we can save some allocations */
599     if (pcbddc->local_auxmat2) {
600       PetscInt on_R,on_constraints;
601       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
602       if (on_R != n_R || on_constraints != n_constraints) {
603         PetscScalar *marray;
604 
605         ierr = MatDenseGetArray(pcbddc->local_auxmat2,&marray);CHKERRQ(ierr);
606         ierr = PetscFree(marray);CHKERRQ(ierr);
607         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
608         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
609       }
610     }
611     /* auxiliary matrices */
612     if (!pcbddc->local_auxmat2) {
613       PetscScalar *marray;
614 
615       ierr = PetscMalloc1(2*n_R*n_constraints,&marray);CHKERRQ(ierr);
616       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,marray,&pcbddc->local_auxmat2);CHKERRQ(ierr);
617       marray += n_R*n_constraints;
618       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_R,marray,&pcbddc->local_auxmat1);CHKERRQ(ierr);
619     }
620 
621     /* Extract constraints on R nodes: C_{CR}  */
622     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
623     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
624     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
625 
626     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
627     ierr = PetscMemzero(work,n_R*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
628     for (i=0;i<n_constraints;i++) {
629       const PetscScalar *row_cmat_values;
630       const PetscInt    *row_cmat_indices;
631       PetscInt          size_of_constraint,j;
632 
633       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
634       for (j=0;j<size_of_constraint;j++) {
635         work[row_cmat_indices[j]+i*n_R] = -row_cmat_values[j];
636       }
637       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
638     }
639     if (F) {
640       Mat B;
641 
642       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
643       ierr = MatMatSolve(F,B,pcbddc->local_auxmat2);CHKERRQ(ierr);
644       ierr = MatDestroy(&B);CHKERRQ(ierr);
645     } else {
646       PetscScalar *xarray;
647       ierr = MatDenseGetArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
648       for (i=0;i<n_constraints;i++) {
649         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
650         ierr = VecPlaceArray(pcbddc->vec2_R,xarray+i*n_R);CHKERRQ(ierr);
651         ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
652         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
653         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
654       }
655       ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&xarray);CHKERRQ(ierr);
656     }
657 
658     /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1}  */
659     ierr = MatConvert(C_CR,impMatType,MAT_REUSE_MATRIX,&C_CR);CHKERRQ(ierr);
660     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
661     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr);
662     ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
663     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
664     ierr = VecSet(pcbddc->vec1_C,m_one);CHKERRQ(ierr);
665     ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr);
666     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
667     ierr = MatDestroy(&M2);CHKERRQ(ierr);
668     ierr = MatDestroy(&M3);CHKERRQ(ierr);
669     /* Assemble local_auxmat1 = S_CC*C_{CR} needed by BDDC application in KSP and in preproc */
670     ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
671     ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
672     ierr = MatDestroy(&M1);CHKERRQ(ierr);
673   }
674   /* Get submatrices from subdomain matrix */
675   if (n_vertices) {
676     Mat       newmat;
677     IS        is_aux;
678     PetscInt  ibs,mbs;
679     PetscBool issbaij;
680 
681     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
682     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
683     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
684     if (ibs != mbs) { /* need to convert to SEQAIJ */
685       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
686       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
687       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
688       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
689       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
690     } else {
691       /* this is safe */
692       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
693       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
694       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
695         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
696         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
697         ierr = MatTranspose(A_VR,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
698         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
699         ierr = MatConvert(A_VV,MATSEQBAIJ,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
700       } else {
701         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
702         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
703       }
704     }
705     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
706   }
707 
708   /* Matrix of coarse basis functions (local) */
709   if (pcbddc->coarse_phi_B) {
710     PetscInt on_B,on_primal,on_D=n_D;
711     if (pcbddc->coarse_phi_D) {
712       ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr);
713     }
714     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
715     if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
716       PetscScalar *marray;
717 
718       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr);
719       ierr = PetscFree(marray);CHKERRQ(ierr);
720       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
721       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
722       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
723       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!pcbddc->coarse_phi_B) {
728     PetscScalar *marray;
729 
730     n = n_B*pcbddc->local_primal_size;
731     if (pcbddc->switch_static || pcbddc->dbg_flag) {
732       n += n_D*pcbddc->local_primal_size;
733     }
734     if (!pcbddc->issym) {
735       n *= 2;
736     }
737     ierr = PetscCalloc1(n,&marray);CHKERRQ(ierr);
738     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
739     n = n_B*pcbddc->local_primal_size;
740     if (pcbddc->switch_static || pcbddc->dbg_flag) {
741       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
742       n += n_D*pcbddc->local_primal_size;
743     }
744     if (!pcbddc->issym) {
745       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
746       if (pcbddc->switch_static || pcbddc->dbg_flag) {
747         n = n_B*pcbddc->local_primal_size;
748         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marray+n,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
749       }
750     } else {
751       ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr);
752       pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
753       if (pcbddc->switch_static || pcbddc->dbg_flag) {
754         ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr);
755         pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
756       }
757     }
758   }
759   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
760   /* vertices */
761   if (n_vertices) {
762 
763     if (n_R) {
764       Mat          A_RRmA_RV,S_VVt; /* S_VVt with LDA=N */
765       PetscBLASInt B_N,B_one = 1;
766       PetscScalar  *x,*y;
767 
768       ierr = PetscMemzero(work,2*n_R*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
769       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr);
770       ierr = MatConvert(A_RV,impMatType,MAT_REUSE_MATRIX,&A_RV);CHKERRQ(ierr);
771       if (F) {
772         ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr);
773       } else {
774         ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr);
775         for (i=0;i<n_vertices;i++) {
776           ierr = VecPlaceArray(pcbddc->vec1_R,y+i*n_R);CHKERRQ(ierr);
777           ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr);
778           ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
779           ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
780           ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
781         }
782         ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr);
783       }
784       ierr = MatScale(A_RRmA_RV,m_one);CHKERRQ(ierr);
785       /* S_VV and S_CV are the subdomain contribution to coarse matrix. WARNING -> column major ordering */
786       if (n_constraints) {
787         Mat B;
788         ierr = MatMatMult(pcbddc->local_auxmat1,A_RRmA_RV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr);
789         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+n_R*n_vertices,&B);CHKERRQ(ierr);
790         ierr = MatMatMult(pcbddc->local_auxmat2,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
791         ierr = MatScale(S_CV,m_one);CHKERRQ(ierr);
792         ierr = PetscBLASIntCast(n_R*n_vertices,&B_N);CHKERRQ(ierr);
793         PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+n_R*n_vertices,&B_one,work,&B_one));
794         ierr = MatDestroy(&B);CHKERRQ(ierr);
795       }
796       ierr = MatConvert(A_VR,impMatType,MAT_REUSE_MATRIX,&A_VR);CHKERRQ(ierr);
797       ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr);
798       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
799       ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr);
800       ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr);
801       ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr);
802       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
803       ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr);
804       ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr);
805       ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
806       ierr = MatDestroy(&S_VVt);CHKERRQ(ierr);
807       ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr);
808     } else {
809       ierr = MatConvert(A_VV,impMatType,MAT_REUSE_MATRIX,&A_VV);CHKERRQ(ierr);
810       ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
811     }
812     /* coarse basis functions */
813     for (i=0;i<n_vertices;i++) {
814       PetscScalar *y;
815 
816       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
817       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
818       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
819       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
820       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821       y[n_B*i+idx_V_B[i]] = 1.0;
822       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
823       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
824 
825       if (pcbddc->switch_static || pcbddc->dbg_flag) {
826         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
827         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
828         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
831         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
832       }
833       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
834     }
835     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
836     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
837   }
838 
839   if (n_constraints) {
840     Mat B;
841 
842     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr);
843     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
844     ierr = MatMatMult(pcbddc->local_auxmat2,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr);
845     ierr = MatScale(S_CC,m_one);CHKERRQ(ierr);
846     if (n_vertices) {
847       ierr = MatMatMult(A_VR,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_VC);CHKERRQ(ierr);
848     }
849     ierr = MatDestroy(&B);CHKERRQ(ierr);
850     /* coarse basis functions */
851     for (i=0;i<n_constraints;i++) {
852       PetscScalar *y;
853 
854       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*i);CHKERRQ(ierr);
855       ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
856       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr);
857       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr);
860       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
861 
862       if (pcbddc->switch_static || pcbddc->dbg_flag) {
863         ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
864         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr);
865         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
868         ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr);
869       }
870       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
871     }
872   }
873 
874   /* compute other basis functions for non-symmetric problems */
875   if (!pcbddc->issym) {
876     Mat B,X;
877 
878     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work,&B);CHKERRQ(ierr);
879 
880     if (n_constraints) {
881       Mat S_CCT,B_C;
882 
883       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work+n_vertices*n_R,&B_C);CHKERRQ(ierr);
884       ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr);
885       ierr = MatTransposeMatMult(C_CR,S_CCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr);
886       ierr = MatDestroy(&S_CCT);CHKERRQ(ierr);
887       if (n_vertices) {
888         Mat B_V,S_VCT;
889 
890         ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&B_V);CHKERRQ(ierr);
891         ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr);
892         ierr = MatTransposeMatMult(C_CR,S_VCT,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr);
893         ierr = MatDestroy(&B_V);CHKERRQ(ierr);
894         ierr = MatDestroy(&S_VCT);CHKERRQ(ierr);
895       }
896       ierr = MatDestroy(&B_C);CHKERRQ(ierr);
897     }
898     if (n_vertices && n_R) {
899       Mat          A_VRT;
900       PetscBLASInt B_N,B_one = 1;
901 
902       if (!n_constraints) { /* if there are no constraints, reset work */
903         ierr = PetscMemzero(work,n_R*pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
904       }
905       ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work+pcbddc->local_primal_size*n_R,&A_VRT);CHKERRQ(ierr);
906       ierr = MatTranspose(A_VR,MAT_REUSE_MATRIX,&A_VRT);CHKERRQ(ierr);
907       ierr = PetscBLASIntCast(n_vertices*n_R,&B_N);CHKERRQ(ierr);
908       PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&m_one,work+pcbddc->local_primal_size*n_R,&B_one,work,&B_one));
909       ierr = MatDestroy(&A_VRT);CHKERRQ(ierr);
910     }
911 
912     ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,pcbddc->local_primal_size,work+pcbddc->local_primal_size*n_R,&X);CHKERRQ(ierr);
913     if (F) { /* currently there's no support for MatTransposeMatSolve(F,B,X) */
914       for (i=0;i<pcbddc->local_primal_size;i++) {
915         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
916         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
917         ierr = MatSolveTranspose(F,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
918         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
919         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
920       }
921     } else {
922       for (i=0;i<pcbddc->local_primal_size;i++) {
923         ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr);
924         ierr = VecPlaceArray(pcbddc->vec2_R,work+(i+pcbddc->local_primal_size)*n_R);CHKERRQ(ierr);
925         ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
926         ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
927         ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr);
928       }
929     }
930     ierr = MatDestroy(&B);CHKERRQ(ierr);
931     /* coarse basis functions */
932     for (i=0;i<pcbddc->local_primal_size;i++) {
933       PetscScalar *y;
934 
935       ierr = VecPlaceArray(pcbddc->vec1_R,work+n_R*(i+pcbddc->local_primal_size));CHKERRQ(ierr);
936       ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
937       ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr);
938       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       if (i<n_vertices) {
941         y[n_B*i+idx_V_B[i]] = 1.0;
942       }
943       ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr);
944       ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr);
945 
946       if (pcbddc->switch_static || pcbddc->dbg_flag) {
947         ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
948         ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr);
949         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951         ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr);
952         ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr);
953       }
954       ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr);
955     }
956     ierr = MatDestroy(&X);CHKERRQ(ierr);
957   }
958 
959   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
960   ierr = MatDestroy(&S_VV);CHKERRQ(ierr);
961   ierr = MatDestroy(&S_CV);CHKERRQ(ierr);
962   ierr = MatDestroy(&S_VC);CHKERRQ(ierr);
963   ierr = MatDestroy(&S_CC);CHKERRQ(ierr);
964   /* Checking coarse_sub_mat and coarse basis functios */
965   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
966   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
967   if (pcbddc->dbg_flag) {
968     Mat         coarse_sub_mat;
969     Mat         AUXMAT,TM1,TM2,TM3,TM4;
970     Mat         coarse_phi_D,coarse_phi_B;
971     Mat         coarse_psi_D,coarse_psi_B;
972     Mat         A_II,A_BB,A_IB,A_BI;
973     Mat         C_B,CPHI;
974     IS          is_dummy;
975     Vec         mones;
976     MatType     checkmattype=MATSEQAIJ;
977     PetscReal   real_value;
978 
979     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
980     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
981     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
982     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
983     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
984     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
985     if (unsymmetric_check) {
986       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
987       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
988     }
989     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
990 
991     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
992     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation\n");CHKERRQ(ierr);
993     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
994     if (unsymmetric_check) {
995       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
996       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
997       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
998       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
999       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1000       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1001       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1002       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1003       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1004       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1005       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1006       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1007     } else {
1008       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
1009       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
1010       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1011       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
1012       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1013       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
1014       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
1015       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
1016     }
1017     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1018     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1019     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1020     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
1021     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
1022     ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1023     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1024     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d      matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1025 
1026     /* check constraints */
1027     ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&is_dummy);CHKERRQ(ierr);
1028     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
1029     ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr);
1030     ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr);
1031     ierr = VecSet(mones,-1.0);CHKERRQ(ierr);
1032     ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr);
1033     ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr);
1034     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr);
1035     ierr = MatDestroy(&C_B);CHKERRQ(ierr);
1036     ierr = MatDestroy(&CPHI);CHKERRQ(ierr);
1037     ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
1038     ierr = VecDestroy(&mones);CHKERRQ(ierr);
1039 
1040     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1041     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
1042     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
1043     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
1044     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
1045     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
1046     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
1047     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
1048     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
1049     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
1050     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
1051     if (unsymmetric_check) {
1052       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
1053       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
1054     }
1055     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
1056   }
1057 
1058   /* free memory */
1059   ierr = PetscFree(work);CHKERRQ(ierr);
1060   if (n_vertices) {
1061     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
1062   }
1063   if (n_constraints) {
1064     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
1065   }
1066   /* get back data */
1067   *coarse_submat_vals_n = coarse_submat_vals;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetSubMatrixUnsorted"
1073 PetscErrorCode MatGetSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
1074 {
1075   Mat            *work_mat;
1076   IS             isrow_s,iscol_s;
1077   PetscBool      rsorted,csorted;
1078   PetscInt       rsize,*idxs_perm_r,csize,*idxs_perm_c;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr);
1083   ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr);
1084   ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr);
1085   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1086 
1087   if (!rsorted) {
1088     const PetscInt *idxs;
1089     PetscInt *idxs_sorted,i;
1090 
1091     ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr);
1092     ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr);
1093     for (i=0;i<rsize;i++) {
1094       idxs_perm_r[i] = i;
1095     }
1096     ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr);
1097     ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr);
1098     for (i=0;i<rsize;i++) {
1099       idxs_sorted[i] = idxs[idxs_perm_r[i]];
1100     }
1101     ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr);
1102     ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
1105     isrow_s = isrow;
1106   }
1107 
1108   if (!csorted) {
1109     if (isrow == iscol) {
1110       ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr);
1111       iscol_s = isrow_s;
1112     } else {
1113       const PetscInt *idxs;
1114       PetscInt *idxs_sorted,i;
1115 
1116       ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr);
1117       ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr);
1118       for (i=0;i<csize;i++) {
1119         idxs_perm_c[i] = i;
1120       }
1121       ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr);
1122       ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr);
1123       for (i=0;i<csize;i++) {
1124         idxs_sorted[i] = idxs[idxs_perm_c[i]];
1125       }
1126       ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr);
1127       ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr);
1128     }
1129   } else {
1130     ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
1131     iscol_s = iscol;
1132   }
1133 
1134   ierr = MatGetSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1135 
1136   if (!rsorted || !csorted) {
1137     Mat      new_mat;
1138     IS       is_perm_r,is_perm_c;
1139 
1140     if (!rsorted) {
1141       PetscInt *idxs_r,i;
1142       ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr);
1143       for (i=0;i<rsize;i++) {
1144         idxs_r[idxs_perm_r[i]] = i;
1145       }
1146       ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr);
1147       ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr);
1148     } else {
1149       ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr);
1150     }
1151     ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr);
1152 
1153     if (!csorted) {
1154       if (isrow_s == iscol_s) {
1155         ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr);
1156         is_perm_c = is_perm_r;
1157       } else {
1158         PetscInt *idxs_c,i;
1159         ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr);
1160         for (i=0;i<csize;i++) {
1161           idxs_c[idxs_perm_c[i]] = i;
1162         }
1163         ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr);
1164         ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr);
1165       }
1166     } else {
1167       ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr);
1168     }
1169     ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr);
1170 
1171     ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr);
1172     ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr);
1173     work_mat[0] = new_mat;
1174     ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr);
1175     ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr);
1176   }
1177 
1178   ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr);
1179   *B = work_mat[0];
1180   ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr);
1181   ierr = ISDestroy(&isrow_s);CHKERRQ(ierr);
1182   ierr = ISDestroy(&iscol_s);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "PCBDDCComputeLocalMatrix"
1188 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
1189 {
1190   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
1191   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1192   Mat            new_mat;
1193   IS             is_local,is_global;
1194   PetscInt       local_size;
1195   PetscBool      isseqaij;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
1200   ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr);
1201   ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr);
1202   ierr = ISLocalToGlobalMappingApplyIS(matis->mapping,is_local,&is_global);CHKERRQ(ierr);
1203   ierr = ISDestroy(&is_local);CHKERRQ(ierr);
1204   ierr = MatGetSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr);
1205   ierr = ISDestroy(&is_global);CHKERRQ(ierr);
1206 
1207   /* check */
1208   if (pcbddc->dbg_flag) {
1209     Vec       x,x_change;
1210     PetscReal error;
1211 
1212     ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr);
1213     ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
1214     ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr);
1215     ierr = VecScatterBegin(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216     ierr = VecScatterEnd(matis->ctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217     ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1220     ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
1221     ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
1222     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1223     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change on N: %1.6e\n",error);CHKERRQ(ierr);
1224     ierr = VecDestroy(&x);CHKERRQ(ierr);
1225     ierr = VecDestroy(&x_change);CHKERRQ(ierr);
1226   }
1227 
1228   /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
1229   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr);
1230   if (isseqaij) {
1231     ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1232   } else {
1233     Mat work_mat;
1234     ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
1235     ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
1236     ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
1237   }
1238   ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
1239 #if !defined(PETSC_USE_COMPLEX)
1240   ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,pcbddc->issym);CHKERRQ(ierr);
1241 #endif
1242   /*
1243   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1244   ierr = MatView(new_mat,(PetscViewer)0);CHKERRQ(ierr);
1245   */
1246   ierr = MatDestroy(&new_mat);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
1252 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
1253 {
1254   PC_IS*         pcis = (PC_IS*)(pc->data);
1255   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
1256   IS             is_aux1,is_aux2;
1257   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
1258   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
1259   PetscInt       vbs,bs;
1260   PetscBT        bitmask;
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   /*
1265     No need to setup local scatters if
1266       - primal space is unchanged
1267         AND
1268       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
1269         AND
1270       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
1271   */
1272   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
1273     PetscFunctionReturn(0);
1274   }
1275   /* destroy old objects */
1276   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
1277   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
1278   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
1279   /* Set Non-overlapping dimensions */
1280   n_B = pcis->n_B; n_D = pcis->n - n_B;
1281   n_vertices = pcbddc->n_actual_vertices;
1282   /* create auxiliary bitmask */
1283   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
1284   for (i=0;i<n_vertices;i++) {
1285     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
1286   }
1287 
1288   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
1289   ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr);
1290   for (i=0, n_R=0; i<pcis->n; i++) {
1291     if (!PetscBTLookup(bitmask,i)) {
1292       idx_R_local[n_R] = i;
1293       n_R++;
1294     }
1295   }
1296 
1297   /* Block code */
1298   vbs = 1;
1299   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
1300   if (bs>1 && !(n_vertices%bs)) {
1301     PetscBool is_blocked = PETSC_TRUE;
1302     PetscInt  *vary;
1303     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
1304     ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr);
1305     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
1306     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
1307     for (i=0; i<n_vertices/bs; i++) {
1308       if (vary[i]!=0 && vary[i]!=bs) {
1309         is_blocked = PETSC_FALSE;
1310         break;
1311       }
1312     }
1313     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
1314       vbs = bs;
1315       for (i=0;i<n_R/vbs;i++) {
1316         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
1317       }
1318     }
1319     ierr = PetscFree(vary);CHKERRQ(ierr);
1320   }
1321   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
1322   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
1323 
1324   /* print some info if requested */
1325   if (pcbddc->dbg_flag) {
1326     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1327     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1328     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1329     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
1330     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
1331     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
1332     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
1333     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1334   }
1335 
1336   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
1337   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1338   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr);
1339   ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr);
1340   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1341   for (i=0; i<n_D; i++) {
1342     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
1343   }
1344   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1345   for (i=0, j=0; i<n_R; i++) {
1346     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
1347       aux_array1[j++] = i;
1348     }
1349   }
1350   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1351   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1352   for (i=0, j=0; i<n_B; i++) {
1353     if (!PetscBTLookup(bitmask,is_indices[i])) {
1354       aux_array2[j++] = i;
1355     }
1356   }
1357   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1358   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
1359   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
1360   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1361   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
1362 
1363   if (pcbddc->switch_static || pcbddc->dbg_flag) {
1364     ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr);
1365     for (i=0, j=0; i<n_R; i++) {
1366       if (PetscBTLookup(bitmask,idx_R_local[i])) {
1367         aux_array1[j++] = i;
1368       }
1369     }
1370     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
1371     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
1372     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
1373   }
1374   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
1375   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
1382 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
1383 {
1384   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1385   PC_IS          *pcis = (PC_IS*)pc->data;
1386   PC             pc_temp;
1387   Mat            A_RR;
1388   MatReuse       reuse;
1389   PetscScalar    m_one = -1.0;
1390   PetscReal      value;
1391   PetscInt       n_D,n_R,ibs,mbs;
1392   PetscBool      use_exact,use_exact_reduced,issbaij;
1393   PetscErrorCode ierr;
1394   /* prefixes stuff */
1395   char           dir_prefix[256],neu_prefix[256],str_level[16];
1396   size_t         len;
1397 
1398   PetscFunctionBegin;
1399 
1400   /* compute prefixes */
1401   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1402   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1403   if (!pcbddc->current_level) {
1404     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1405     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1406     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1407     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1408   } else {
1409     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1410     sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
1411     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1412     len -= 15; /* remove "pc_bddc_coarse_" */
1413     if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
1414     if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
1415     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1416     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
1417     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1418     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1419     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1420     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1421   }
1422 
1423   /* DIRICHLET PROBLEM */
1424   if (dirichlet) {
1425     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
1426     if (pcbddc->issym) {
1427       ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1428     }
1429     /* Matrix for Dirichlet problem is pcis->A_II */
1430     n_D = pcis->n - pcis->n_B;
1431     if (!pcbddc->ksp_D) { /* create object if not yet build */
1432       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1433       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1434       /* default */
1435       ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1436       ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1437       ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1438       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1439       if (issbaij) {
1440         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1441       } else {
1442         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1443       }
1444       /* Allow user's customization */
1445       ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1446       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1447     }
1448     ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr);
1449     if (sub_schurs->interior_solver) {
1450       ierr = KSPSetPC(pcbddc->ksp_D,sub_schurs->interior_solver);CHKERRQ(ierr);
1451     }
1452     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1453     if (!n_D) {
1454       ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1455       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1456     }
1457     /* Set Up KSP for Dirichlet problem of BDDC */
1458     ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1459     /* set ksp_D into pcis data */
1460     ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1461     ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1462     pcis->ksp_D = pcbddc->ksp_D;
1463   }
1464 
1465   /* NEUMANN PROBLEM */
1466   A_RR = 0;
1467   if (neumann) {
1468     /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1469     ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1470     if (pcbddc->ksp_R) { /* already created ksp */
1471       PetscInt nn_R;
1472       ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr);
1473       ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1474       ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1475       if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1476         ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1477         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1478         reuse = MAT_INITIAL_MATRIX;
1479       } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1480         if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1481           ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1482           reuse = MAT_INITIAL_MATRIX;
1483         } else { /* safe to reuse the matrix */
1484           reuse = MAT_REUSE_MATRIX;
1485         }
1486       }
1487       /* last check */
1488       if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1489         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1490         reuse = MAT_INITIAL_MATRIX;
1491       }
1492     } else { /* first time, so we need to create the matrix */
1493       reuse = MAT_INITIAL_MATRIX;
1494     }
1495     /* extract A_RR */
1496     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1497     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1498     if (ibs != mbs) {
1499       Mat newmat;
1500       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1501       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1502       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1503     } else {
1504       ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1505     }
1506     if (pcbddc->issym) {
1507       ierr = MatSetOption(A_RR,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
1508     }
1509     if (!pcbddc->ksp_R) { /* create object if not present */
1510       ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1511       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1512       /* default */
1513       ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1514       ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1515       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1516       ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1517       if (issbaij) {
1518         ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1519       } else {
1520         ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1521       }
1522       /* Allow user's customization */
1523       ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1524       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1525     }
1526     ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr);
1527     /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1528     if (!n_R) {
1529       ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1530       ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1531     }
1532     /* Set Up KSP for Neumann problem of BDDC */
1533     ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1534   }
1535 
1536   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1537   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1538     if (pcbddc->dbg_flag) {
1539       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1540       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1541       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1542     }
1543     if (dirichlet) { /* Dirichlet */
1544       ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1545       ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1546       ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1547       ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1548       ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1549       /* need to be adapted? */
1550       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1551       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1552       ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1553       /* print info */
1554       if (pcbddc->dbg_flag) {
1555         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1556         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1557       }
1558       if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1559         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1560       }
1561     }
1562     if (neumann) { /* Neumann */
1563       ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1564       ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1565       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1566       ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1567       ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1568       /* need to be adapted? */
1569       use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1570       ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1571       /* print info */
1572       if (pcbddc->dbg_flag) {
1573         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1574         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1575       }
1576       if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1577         ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1578       }
1579     }
1580   }
1581   /* free Neumann problem's matrix */
1582   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "PCBDDCSolveSubstructureCorrection"
1588 static PetscErrorCode  PCBDDCSolveSubstructureCorrection(PC pc, Vec rhs, Vec sol, Vec work, PetscBool applytranspose)
1589 {
1590   PetscErrorCode ierr;
1591   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1592 
1593   PetscFunctionBegin;
1594   if (applytranspose) {
1595     if (pcbddc->local_auxmat1) {
1596       ierr = MatMultTranspose(pcbddc->local_auxmat2,rhs,work);CHKERRQ(ierr);
1597       ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,work,rhs,rhs);CHKERRQ(ierr);
1598     }
1599     ierr = KSPSolveTranspose(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1600   } else {
1601     ierr = KSPSolve(pcbddc->ksp_R,rhs,sol);CHKERRQ(ierr);
1602     if (pcbddc->local_auxmat1) {
1603       ierr = MatMult(pcbddc->local_auxmat1,sol,work);CHKERRQ(ierr);
1604       ierr = MatMultAdd(pcbddc->local_auxmat2,work,sol,sol);CHKERRQ(ierr);
1605     }
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
1611 #undef __FUNCT__
1612 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1613 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
1614 {
1615   PetscErrorCode ierr;
1616   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1617   PC_IS*            pcis = (PC_IS*)  (pc->data);
1618   const PetscScalar zero = 0.0;
1619 
1620   PetscFunctionBegin;
1621   /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
1622   if (applytranspose) {
1623     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1624     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1625   } else {
1626     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1627     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1628   }
1629   /* start communications from local primal nodes to rhs of coarse solver */
1630   ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr);
1631   ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1632   ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633 
1634   /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
1635   /* TODO remove null space when doing multilevel */
1636   if (pcbddc->coarse_ksp) {
1637     Vec rhs,sol;
1638 
1639     ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr);
1640     ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr);
1641     if (applytranspose) {
1642       ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1643     } else {
1644       ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr);
1645     }
1646   }
1647 
1648   /* Local solution on R nodes */
1649   if (pcis->n) {
1650     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1651     ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1652     ierr = VecScatterEnd(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1653     if (pcbddc->switch_static) {
1654       ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1655       ierr = VecScatterEnd(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1656     }
1657     ierr = PCBDDCSolveSubstructureCorrection(pc,pcbddc->vec1_R,pcbddc->vec2_R,pcbddc->vec1_C,applytranspose);CHKERRQ(ierr);
1658     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1659     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (pcbddc->switch_static) {
1662       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1663       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1664     }
1665   }
1666 
1667   /* communications from coarse sol to local primal nodes */
1668   ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1669   ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1670 
1671   /* Sum contributions from two levels */
1672   if (applytranspose) {
1673     ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1674     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1675   } else {
1676     ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1677     if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1684 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
1685 {
1686   PetscErrorCode ierr;
1687   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1688   PetscScalar    *array;
1689   Vec            from,to;
1690 
1691   PetscFunctionBegin;
1692   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1693     from = pcbddc->coarse_vec;
1694     to = pcbddc->vec1_P;
1695     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1696       Vec tvec;
1697 
1698       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1699       ierr = VecResetArray(tvec);CHKERRQ(ierr);
1700       ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1701       ierr = VecGetArray(tvec,&array);CHKERRQ(ierr);
1702       ierr = VecPlaceArray(from,array);CHKERRQ(ierr);
1703       ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr);
1704     }
1705   } else { /* from local to global -> put data in coarse right hand side */
1706     from = pcbddc->vec1_P;
1707     to = pcbddc->coarse_vec;
1708   }
1709   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1715 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
1716 {
1717   PetscErrorCode ierr;
1718   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1719   PetscScalar    *array;
1720   Vec            from,to;
1721 
1722   PetscFunctionBegin;
1723   if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
1724     from = pcbddc->coarse_vec;
1725     to = pcbddc->vec1_P;
1726   } else { /* from local to global -> put data in coarse right hand side */
1727     from = pcbddc->vec1_P;
1728     to = pcbddc->coarse_vec;
1729   }
1730   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr);
1731   if (smode == SCATTER_FORWARD) {
1732     if (pcbddc->coarse_ksp) { /* get array from coarse processes */
1733       Vec tvec;
1734 
1735       ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr);
1736       ierr = VecGetArray(to,&array);CHKERRQ(ierr);
1737       ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr);
1738       ierr = VecRestoreArray(to,&array);CHKERRQ(ierr);
1739     }
1740   } else {
1741     if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
1742      ierr = VecResetArray(from);CHKERRQ(ierr);
1743     }
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 /* uncomment for testing purposes */
1749 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1750 #undef __FUNCT__
1751 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1752 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1753 {
1754   PetscErrorCode    ierr;
1755   PC_IS*            pcis = (PC_IS*)(pc->data);
1756   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1757   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1758   /* one and zero */
1759   PetscScalar       one=1.0,zero=0.0;
1760   /* space to store constraints and their local indices */
1761   PetscScalar       *temp_quadrature_constraint;
1762   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1763   /* iterators */
1764   PetscInt          i,j,k,total_counts,temp_start_ptr;
1765   /* BLAS integers */
1766   PetscBLASInt      lwork,lierr;
1767   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1768   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1769   /* reuse */
1770   PetscInt          olocal_primal_size;
1771   PetscInt          *oprimal_indices_local_idxs;
1772   /* change of basis */
1773   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1774   PetscBool         boolforchange,qr_needed;
1775   PetscBT           touched,change_basis,qr_needed_idx;
1776   /* auxiliary stuff */
1777   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1778   PetscInt          ncc,*gidxs=NULL,*permutation=NULL,*temp_indices_to_constraint_work=NULL;
1779   PetscScalar       *temp_quadrature_constraint_work=NULL;
1780   /* some quantities */
1781   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1782   PetscInt          size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
1783 
1784   PetscFunctionBegin;
1785   /* Destroy Mat objects computed previously */
1786   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1787   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1788 
1789   /* print some info */
1790   if (pcbddc->dbg_flag) {
1791     IS       vertices;
1792     PetscInt nv,nedges,nfaces;
1793     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr);
1794     ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr);
1795     ierr = ISDestroy(&vertices);CHKERRQ(ierr);
1796     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1798     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr);
1799     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges    (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr);
1800     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces    (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr);
1801     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1802   }
1803 
1804   if (!pcbddc->adaptive_selection) {
1805     IS           ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1806     MatNullSpace nearnullsp;
1807     const Vec    *nearnullvecs;
1808     Vec          *localnearnullsp;
1809     PetscScalar  *array;
1810     PetscInt     n_ISForFaces,n_ISForEdges,nnsp_size;
1811     PetscBool    nnsp_has_cnst;
1812     /* LAPACK working arrays for SVD or POD */
1813     PetscBool    skip_lapack;
1814     PetscScalar  *work;
1815     PetscReal    *singular_vals;
1816 #if defined(PETSC_USE_COMPLEX)
1817     PetscReal    *rwork;
1818 #endif
1819 #if defined(PETSC_MISSING_LAPACK_GESVD)
1820     PetscScalar  *temp_basis,*correlation_mat;
1821 #else
1822     PetscBLASInt dummy_int=1;
1823     PetscScalar  dummy_scalar=1.;
1824 #endif
1825 
1826     /* Get index sets for faces, edges and vertices from graph */
1827     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr);
1828     /* free unneeded index sets */
1829     if (!pcbddc->use_vertices) {
1830       ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1831     }
1832     if (!pcbddc->use_edges) {
1833       for (i=0;i<n_ISForEdges;i++) {
1834         ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1835       }
1836       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1837       n_ISForEdges = 0;
1838     }
1839     if (!pcbddc->use_faces) {
1840       for (i=0;i<n_ISForFaces;i++) {
1841         ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1842       }
1843       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1844       n_ISForFaces = 0;
1845     }
1846     /* HACKS (the following two blocks of code) */
1847     if (!ISForVertices && pcbddc->NullSpace && !pcbddc->user_ChangeOfBasisMatrix) {
1848       pcbddc->use_change_of_basis = PETSC_TRUE;
1849       if (!ISForEdges) {
1850         pcbddc->use_change_on_faces = PETSC_TRUE;
1851       }
1852     }
1853     if (pcbddc->NullSpace) {
1854       /* use_change_of_basis should be consistent among processors */
1855       PetscBool tbool[2],gbool[2];
1856       tbool[0] = pcbddc->use_change_of_basis;
1857       tbool[1] = pcbddc->use_change_on_faces;
1858       ierr = MPI_Allreduce(tbool,gbool,2,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1859       pcbddc->use_change_of_basis = gbool[0];
1860       pcbddc->use_change_on_faces = gbool[1];
1861     }
1862 
1863     /* check if near null space is attached to global mat */
1864     ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1865     if (nearnullsp) {
1866       ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1867       /* remove any stored info */
1868       ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1869       ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1870       /* store information for BDDC solver reuse */
1871       ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1872       pcbddc->onearnullspace = nearnullsp;
1873       ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1874       for (i=0;i<nnsp_size;i++) {
1875         ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1876       }
1877     } else { /* if near null space is not provided BDDC uses constants by default */
1878       nnsp_size = 0;
1879       nnsp_has_cnst = PETSC_TRUE;
1880     }
1881     /* get max number of constraints on a single cc */
1882     max_constraints = nnsp_size;
1883     if (nnsp_has_cnst) max_constraints++;
1884 
1885     /*
1886          Evaluate maximum storage size needed by the procedure
1887          - temp_indices will contain start index of each constraint stored as follows
1888          - temp_indices_to_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1889          - temp_indices_to_constraint_B[temp_indices[i],...,temp_indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1890          - temp_quadrature_constraint  [temp_indices[i],...,temp_indices[i+1]-1] will contain the scalars representing the constraint itself
1891                                                                                                                                                            */
1892     total_counts = n_ISForFaces+n_ISForEdges;
1893     total_counts *= max_constraints;
1894     n_vertices = 0;
1895     if (ISForVertices) {
1896       ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1897     }
1898     total_counts += n_vertices;
1899     ierr = PetscMalloc1(total_counts+1,&temp_indices);CHKERRQ(ierr);
1900     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1901     total_counts = 0;
1902     max_size_of_constraint = 0;
1903     for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1904       if (i<n_ISForEdges) {
1905         used_IS = &ISForEdges[i];
1906       } else {
1907         used_IS = &ISForFaces[i-n_ISForEdges];
1908       }
1909       ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1910       total_counts += j;
1911       max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1912     }
1913     total_counts *= max_constraints;
1914     total_counts += n_vertices;
1915     ierr = PetscMalloc3(total_counts,&temp_quadrature_constraint,total_counts,&temp_indices_to_constraint,total_counts,&temp_indices_to_constraint_B);CHKERRQ(ierr);
1916     /* get local part of global near null space vectors */
1917     ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr);
1918     for (k=0;k<nnsp_size;k++) {
1919       ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1920       ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1921       ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1922     }
1923 
1924     /* whether or not to skip lapack calls */
1925     skip_lapack = PETSC_TRUE;
1926     if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
1927 
1928     /* allocate some auxiliary stuff */
1929     if (!skip_lapack || pcbddc->use_qr_single) {
1930       ierr = PetscMalloc4(max_size_of_constraint,&gidxs,max_size_of_constraint,&permutation,max_size_of_constraint,&temp_indices_to_constraint_work,max_size_of_constraint,&temp_quadrature_constraint_work);CHKERRQ(ierr);
1931     } else {
1932       gidxs = NULL;
1933       permutation = NULL;
1934       temp_indices_to_constraint_work = NULL;
1935       temp_quadrature_constraint_work = NULL;
1936     }
1937 
1938     /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1939     if (!skip_lapack) {
1940       PetscScalar temp_work;
1941 
1942 #if defined(PETSC_MISSING_LAPACK_GESVD)
1943       /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1944       ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr);
1945       ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr);
1946       ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr);
1947 #if defined(PETSC_USE_COMPLEX)
1948       ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr);
1949 #endif
1950       /* now we evaluate the optimal workspace using query with lwork=-1 */
1951       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1952       ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1953       lwork = -1;
1954       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1955 #if !defined(PETSC_USE_COMPLEX)
1956       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1957 #else
1958       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1959 #endif
1960       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1961       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1962 #else /* on missing GESVD */
1963       /* SVD */
1964       PetscInt max_n,min_n;
1965       max_n = max_size_of_constraint;
1966       min_n = max_constraints;
1967       if (max_size_of_constraint < max_constraints) {
1968         min_n = max_size_of_constraint;
1969         max_n = max_constraints;
1970       }
1971       ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr);
1972 #if defined(PETSC_USE_COMPLEX)
1973       ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr);
1974 #endif
1975       /* now we evaluate the optimal workspace using query with lwork=-1 */
1976       lwork = -1;
1977       ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1978       ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1979       ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1980       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1981 #if !defined(PETSC_USE_COMPLEX)
1982       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
1983 #else
1984       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
1985 #endif
1986       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1987       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1988 #endif /* on missing GESVD */
1989       /* Allocate optimal workspace */
1990       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1991       ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
1992     }
1993     /* Now we can loop on constraining sets */
1994     total_counts = 0;
1995     temp_indices[0] = 0;
1996     /* vertices */
1997     if (ISForVertices) {
1998       ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1999       if (nnsp_has_cnst) { /* consider all vertices */
2000         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
2001         for (i=0;i<n_vertices;i++) {
2002           temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2003           temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2004           total_counts++;
2005         }
2006       } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
2007         PetscBool used_vertex;
2008         for (i=0;i<n_vertices;i++) {
2009           used_vertex = PETSC_FALSE;
2010           k = 0;
2011           while (!used_vertex && k<nnsp_size) {
2012             ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2013             if (PetscAbsScalar(array[is_indices[i]])>0.0) {
2014               temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
2015               temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
2016               temp_indices[total_counts+1]=temp_indices[total_counts]+1;
2017               total_counts++;
2018               used_vertex = PETSC_TRUE;
2019             }
2020             ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2021             k++;
2022           }
2023         }
2024       }
2025       ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2026       n_vertices = total_counts;
2027     }
2028 
2029     /* edges and faces */
2030     for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
2031       if (ncc<n_ISForEdges) {
2032         used_IS = &ISForEdges[ncc];
2033         boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
2034       } else {
2035         used_IS = &ISForFaces[ncc-n_ISForEdges];
2036         boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
2037       }
2038       temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
2039       temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
2040       ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
2041       ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2042       /* change of basis should not be performed on local periodic nodes */
2043       if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
2044       if (nnsp_has_cnst) {
2045         PetscScalar quad_value;
2046         temp_constraints++;
2047         if (!pcbddc->use_nnsp_true) {
2048           quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
2049         } else {
2050           quad_value = 1.0;
2051         }
2052         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2053         for (j=0;j<size_of_constraint;j++) {
2054           temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
2055         }
2056         /* sort by global ordering if using lapack subroutines (not needed!) */
2057         if (!skip_lapack || pcbddc->use_qr_single) {
2058           ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2059           for (j=0;j<size_of_constraint;j++) {
2060             permutation[j]=j;
2061           }
2062           ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2063           for (j=0;j<size_of_constraint;j++) {
2064             if (permutation[j]!=j) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This should not happen");
2065           }
2066           for (j=0;j<size_of_constraint;j++) {
2067             temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2068             temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2069           }
2070           ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2071           ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2072         }
2073         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2074         total_counts++;
2075       }
2076       for (k=0;k<nnsp_size;k++) {
2077         PetscReal real_value;
2078         ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2079         ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2080         for (j=0;j<size_of_constraint;j++) {
2081           temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
2082         }
2083         ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
2084         /* check if array is null on the connected component */
2085         ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2086         PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
2087         if (real_value > 0.0) { /* keep indices and values */
2088           /* sort by global ordering if using lapack subroutines */
2089           if (!skip_lapack || pcbddc->use_qr_single) {
2090             ierr = ISLocalToGlobalMappingApply(matis->mapping,size_of_constraint,temp_indices_to_constraint+temp_indices[total_counts],gidxs);CHKERRQ(ierr);
2091             for (j=0;j<size_of_constraint;j++) {
2092               permutation[j]=j;
2093             }
2094             ierr = PetscSortIntWithPermutation(size_of_constraint,gidxs,permutation);CHKERRQ(ierr);
2095             for (j=0;j<size_of_constraint;j++) {
2096               temp_indices_to_constraint_work[j] = temp_indices_to_constraint[temp_indices[total_counts]+permutation[j]];
2097               temp_quadrature_constraint_work[j] = temp_quadrature_constraint[temp_indices[total_counts]+permutation[j]];
2098             }
2099             ierr = PetscMemcpy(temp_indices_to_constraint+temp_indices[total_counts],temp_indices_to_constraint_work,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
2100             ierr = PetscMemcpy(temp_quadrature_constraint+temp_indices[total_counts],temp_quadrature_constraint_work,size_of_constraint*sizeof(PetscScalar));CHKERRQ(ierr);
2101           }
2102           temp_constraints++;
2103           temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
2104           total_counts++;
2105         }
2106       }
2107       ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2108       valid_constraints = temp_constraints;
2109       if (!pcbddc->use_nnsp_true && temp_constraints) {
2110         if (temp_constraints == 1) { /* just normalize the constraint */
2111           PetscScalar norm;
2112           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2113           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2114           norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
2115           PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,temp_quadrature_constraint+temp_indices[temp_start_ptr],&Blas_one));
2116         } else { /* perform SVD */
2117           PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
2118 
2119 #if defined(PETSC_MISSING_LAPACK_GESVD)
2120           /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
2121              POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
2122              -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
2123                 the constraints basis will differ (by a complex factor with absolute value equal to 1)
2124                 from that computed using LAPACKgesvd
2125              -> This is due to a different computation of eigenvectors in LAPACKheev
2126              -> The quality of the POD-computed basis will be the same */
2127           ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
2128           /* Store upper triangular part of correlation matrix */
2129           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2130           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2131           for (j=0;j<temp_constraints;j++) {
2132             for (k=0;k<j+1;k++) {
2133               PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one));
2134             }
2135           }
2136           /* compute eigenvalues and eigenvectors of correlation matrix */
2137           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2138           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
2139 #if !defined(PETSC_USE_COMPLEX)
2140           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
2141 #else
2142           PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
2143 #endif
2144           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2145           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
2146           /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
2147           j = 0;
2148           while (j < temp_constraints && singular_vals[j] < tol) j++;
2149           total_counts = total_counts-j;
2150           valid_constraints = temp_constraints-j;
2151           /* scale and copy POD basis into used quadrature memory */
2152           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2153           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2154           ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
2155           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2156           ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
2157           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2158           if (j<temp_constraints) {
2159             PetscInt ii;
2160             for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
2161             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2162             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
2163             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2164             for (k=0;k<temp_constraints-j;k++) {
2165               for (ii=0;ii<size_of_constraint;ii++) {
2166                 temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
2167               }
2168             }
2169           }
2170 #else  /* on missing GESVD */
2171           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2172           ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
2173           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2174           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2175 #if !defined(PETSC_USE_COMPLEX)
2176           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
2177 #else
2178           PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
2179 #endif
2180           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
2181           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2182           /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
2183           k = temp_constraints;
2184           if (k > size_of_constraint) k = size_of_constraint;
2185           j = 0;
2186           while (j < k && singular_vals[k-j-1] < tol) j++;
2187           valid_constraints = k-j;
2188           total_counts = total_counts-temp_constraints+valid_constraints;
2189 #endif /* on missing GESVD */
2190         }
2191       }
2192       /* setting change_of_basis flag is safe now */
2193       if (boolforchange) {
2194         for (j=0;j<valid_constraints;j++) {
2195           PetscBTSet(change_basis,total_counts-j-1);
2196         }
2197       }
2198     }
2199     /* free workspace */
2200     if (!skip_lapack || pcbddc->use_qr_single) {
2201       ierr = PetscFree4(gidxs,permutation,temp_indices_to_constraint_work,temp_quadrature_constraint_work);CHKERRQ(ierr);
2202     }
2203     if (!skip_lapack) {
2204       ierr = PetscFree(work);CHKERRQ(ierr);
2205 #if defined(PETSC_USE_COMPLEX)
2206       ierr = PetscFree(rwork);CHKERRQ(ierr);
2207 #endif
2208       ierr = PetscFree(singular_vals);CHKERRQ(ierr);
2209 #if defined(PETSC_MISSING_LAPACK_GESVD)
2210       ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
2211       ierr = PetscFree(temp_basis);CHKERRQ(ierr);
2212 #endif
2213     }
2214     for (k=0;k<nnsp_size;k++) {
2215       ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
2216     }
2217     ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
2218     /* free index sets of faces, edges and vertices */
2219     for (i=0;i<n_ISForFaces;i++) {
2220       ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
2221     }
2222     if (n_ISForFaces) {
2223       ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
2224     }
2225     for (i=0;i<n_ISForEdges;i++) {
2226       ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
2227     }
2228     if (n_ISForEdges) {
2229       ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
2230     }
2231     ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
2232   } else {
2233     PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
2234     PetscInt        cum = 0;
2235 
2236     total_counts = 0;
2237     n_vertices = 0;
2238     if (sub_schurs->is_Ej_com && pcbddc->use_vertices) {
2239       ierr = ISGetLocalSize(sub_schurs->is_Ej_com,&n_vertices);CHKERRQ(ierr);
2240     }
2241     max_constraints = 0;
2242     for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
2243       total_counts += pcbddc->adaptive_constraints_n[i];
2244       max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
2245     }
2246     temp_indices = pcbddc->adaptive_constraints_ptrs;
2247     temp_indices_to_constraint = pcbddc->adaptive_constraints_idxs;
2248     temp_quadrature_constraint = pcbddc->adaptive_constraints_data;
2249 
2250 #if 0
2251     printf("Found %d totals\n",total_counts);
2252     for (i=0;i<total_counts;i++) {
2253       printf("const %d, start %d",i,temp_indices[i]);
2254       printf(" end %d:\n",temp_indices[i+1]);
2255       for (j=temp_indices[i];j<temp_indices[i+1];j++) {
2256         printf("  idxs %d",temp_indices_to_constraint[j]);
2257         printf("  data %1.2e\n",temp_quadrature_constraint[j]);
2258       }
2259     }
2260     for (i=0;i<n_vertices;i++) {
2261       PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]);
2262     }
2263     for (i=0;i<sub_schurs->n_subs;i++) {
2264       PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]);
2265     }
2266 #endif
2267 
2268     max_size_of_constraint = 0;
2269     for (i=0;i<total_counts;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,temp_indices[i+1]-temp_indices[i]);
2270     ierr = PetscMalloc1(temp_indices[total_counts],&temp_indices_to_constraint_B);CHKERRQ(ierr);
2271     /* Change of basis */
2272     ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
2273     if (pcbddc->use_change_of_basis) {
2274       cum = n_vertices;
2275       for (i=0;i<sub_schurs->n_subs;i++) {
2276         if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
2277           for (j=0;j<pcbddc->adaptive_constraints_n[i+n_vertices];j++) {
2278             ierr = PetscBTSet(change_basis,cum+j);CHKERRQ(ierr);
2279           }
2280         }
2281         cum += pcbddc->adaptive_constraints_n[i+n_vertices];
2282       }
2283     }
2284   }
2285 
2286   /* map temp_indices_to_constraint in boundary numbering */
2287   ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
2288   if (i != temp_indices[total_counts]) {
2289     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
2290   }
2291 
2292   /* set quantities in pcbddc data structure and store previous primal size */
2293   /* n_vertices defines the number of subdomain corners in the primal space */
2294   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
2295   olocal_primal_size = pcbddc->local_primal_size;
2296   pcbddc->local_primal_size = total_counts;
2297   pcbddc->n_vertices = n_vertices;
2298   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
2299 
2300   /* Create constraint matrix */
2301   /* The constraint matrix is used to compute the l2g map of primal dofs */
2302   /* so we need to set it up properly either with or without change of basis */
2303   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
2304   ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr);
2305   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
2306   /* array to compute a local numbering of constraints : vertices first then constraints */
2307   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_numbering);CHKERRQ(ierr);
2308   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
2309   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
2310   ierr = PetscMalloc1(pcbddc->local_primal_size,&aux_primal_minloc);CHKERRQ(ierr);
2311   /* auxiliary stuff for basis change */
2312   ierr = PetscMalloc1(max_size_of_constraint,&global_indices);CHKERRQ(ierr);
2313   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
2314 
2315   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
2316   total_primal_vertices=0;
2317   for (i=0;i<pcbddc->local_primal_size;i++) {
2318     size_of_constraint=temp_indices[i+1]-temp_indices[i];
2319     if (size_of_constraint == 1) {
2320       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
2321       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
2322       aux_primal_minloc[total_primal_vertices]=0;
2323       total_primal_vertices++;
2324     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
2325       PetscInt min_loc,min_index;
2326       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
2327       /* find first untouched local node */
2328       k = 0;
2329       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
2330       min_index = global_indices[k];
2331       min_loc = k;
2332       /* search the minimum among global nodes already untouched on the cc */
2333       for (k=1;k<size_of_constraint;k++) {
2334         /* there can be more than one constraint on a single connected component */
2335         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
2336           min_index = global_indices[k];
2337           min_loc = k;
2338         }
2339       }
2340       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
2341       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
2342       aux_primal_minloc[total_primal_vertices]=min_loc;
2343       total_primal_vertices++;
2344     }
2345   }
2346   /* determine if a QR strategy is needed for change of basis */
2347   qr_needed = PETSC_FALSE;
2348   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
2349   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2350     if (PetscBTLookup(change_basis,i)) {
2351       if (!pcbddc->use_qr_single && !pcbddc->faster_deluxe) {
2352         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2353         j = 0;
2354         for (k=0;k<size_of_constraint;k++) {
2355           if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
2356             j++;
2357           }
2358         }
2359         /* found more than one primal dof on the cc */
2360         if (j > 1) {
2361           PetscBTSet(qr_needed_idx,i);
2362           qr_needed = PETSC_TRUE;
2363         }
2364       } else {
2365         PetscBTSet(qr_needed_idx,i);
2366         qr_needed = PETSC_TRUE;
2367       }
2368     }
2369   }
2370   /* free workspace */
2371   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2372 
2373   /* permute indices in order to have a sorted set of vertices */
2374   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
2375 
2376   /* nonzero structure of constraint matrix */
2377   ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr);
2378   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
2379   j=total_primal_vertices;
2380   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2381     if (!PetscBTLookup(change_basis,i)) {
2382       nnz[j]=temp_indices[i+1]-temp_indices[i];
2383       j++;
2384     }
2385   }
2386   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
2387   ierr = PetscFree(nnz);CHKERRQ(ierr);
2388   /* set values in constraint matrix */
2389   for (i=0;i<total_primal_vertices;i++) {
2390     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2391   }
2392   total_counts = total_primal_vertices;
2393   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2394     if (!PetscBTLookup(change_basis,i)) {
2395       size_of_constraint=temp_indices[i+1]-temp_indices[i];
2396       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
2397       total_counts++;
2398     }
2399   }
2400   /* assembling */
2401   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2402   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2403   /*
2404   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2405   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
2406   */
2407   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
2408   if (pcbddc->use_change_of_basis) {
2409     /* dual and primal dofs on a single cc */
2410     PetscInt     dual_dofs,primal_dofs;
2411     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
2412     PetscInt     primal_counter;
2413     /* working stuff for GEQRF */
2414     PetscScalar  *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t;
2415     PetscBLASInt lqr_work;
2416     /* working stuff for UNGQR */
2417     PetscScalar  *gqr_work,lgqr_work_t;
2418     PetscBLASInt lgqr_work;
2419     /* working stuff for TRTRS */
2420     PetscScalar  *trs_rhs;
2421     PetscBLASInt Blas_NRHS;
2422     /* pointers for values insertion into change of basis matrix */
2423     PetscInt     *start_rows,*start_cols;
2424     PetscScalar  *start_vals;
2425     /* working stuff for values insertion */
2426     PetscBT      is_primal;
2427     /* matrix sizes */
2428     PetscInt     global_size,local_size;
2429     /* temporary change of basis */
2430     Mat          localChangeOfBasisMatrix;
2431     /* extra space for debugging */
2432     PetscScalar  *dbg_work;
2433 
2434     /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
2435     ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr);
2436     ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2437     ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
2438     /* nonzeros for local mat */
2439     ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr);
2440     for (i=0;i<pcis->n;i++) nnz[i]=1;
2441     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
2442       if (PetscBTLookup(change_basis,i)) {
2443         size_of_constraint = temp_indices[i+1]-temp_indices[i];
2444         if (PetscBTLookup(qr_needed_idx,i)) {
2445           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2446         } else {
2447           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint[temp_indices[i]+j]] = 2;
2448           /* get local primal index on the cc */
2449           j = 0;
2450           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
2451           nnz[temp_indices_to_constraint[temp_indices[i]+j]] = size_of_constraint;
2452         }
2453       }
2454     }
2455     ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
2456     ierr = PetscFree(nnz);CHKERRQ(ierr);
2457     /* Set initial identity in the matrix */
2458     for (i=0;i<pcis->n;i++) {
2459       ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
2460     }
2461 
2462     if (pcbddc->dbg_flag) {
2463       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
2464       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2465     }
2466 
2467 
2468     /* Now we loop on the constraints which need a change of basis */
2469     /*
2470        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
2471        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
2472 
2473        Basic blocks of change of basis matrix T computed by
2474 
2475           - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
2476 
2477             | 1        0   ...        0         s_1/S |
2478             | 0        1   ...        0         s_2/S |
2479             |              ...                        |
2480             | 0        ...            1     s_{n-1}/S |
2481             | -s_1/s_n ...    -s_{n-1}/s_n      s_n/S |
2482 
2483             with S = \sum_{i=1}^n s_i^2
2484             NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
2485                   in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
2486 
2487           - QR decomposition of constraints otherwise
2488     */
2489     if (qr_needed) {
2490       /* space to store Q */
2491       ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr);
2492       /* first we issue queries for optimal work */
2493       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2494       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
2495       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2496       lqr_work = -1;
2497       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
2498       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
2499       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
2500       ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr);
2501       lgqr_work = -1;
2502       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
2503       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
2504       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
2505       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2506       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
2507       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
2508       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
2509       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
2510       ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr);
2511       /* array to store scaling factors for reflectors */
2512       ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr);
2513       /* array to store rhs and solution of triangular solver */
2514       ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr);
2515       /* allocating workspace for check */
2516       if (pcbddc->dbg_flag) {
2517         ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr);
2518       }
2519     }
2520     /* array to store whether a node is primal or not */
2521     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
2522     ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr);
2523     ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
2524     if (i != total_primal_vertices) {
2525       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
2526     }
2527     for (i=0;i<total_primal_vertices;i++) {
2528       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
2529     }
2530     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
2531 
2532     /* loop on constraints and see whether or not they need a change of basis and compute it */
2533     /* -> using implicit ordering contained in temp_indices data */
2534     total_counts = pcbddc->n_vertices;
2535     primal_counter = total_counts;
2536     while (total_counts<pcbddc->local_primal_size) {
2537       primal_dofs = 1;
2538       if (PetscBTLookup(change_basis,total_counts)) {
2539         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
2540         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint[temp_indices[total_counts]] == temp_indices_to_constraint[temp_indices[total_counts+primal_dofs]]) {
2541           primal_dofs++;
2542         }
2543         /* get constraint info */
2544         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
2545         dual_dofs = size_of_constraint-primal_dofs;
2546 
2547         if (pcbddc->dbg_flag) {
2548           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
2549         }
2550 
2551         if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
2552 
2553           /* copy quadrature constraints for change of basis check */
2554           if (pcbddc->dbg_flag) {
2555             ierr = PetscMemcpy(dbg_work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2556           }
2557           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
2558           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2559 
2560           /* compute QR decomposition of constraints */
2561           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2562           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2563           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2564           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2565           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
2566           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
2567           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2568 
2569           /* explictly compute R^-T */
2570           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
2571           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
2572           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2573           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
2574           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2575           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2576           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2577           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
2578           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
2579           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2580 
2581           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
2582           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2583           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2584           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2585           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2586           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2587           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
2588           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
2589           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2590 
2591           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
2592              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
2593              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
2594           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
2595           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
2596           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
2597           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2598           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
2599           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
2600           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2601           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
2602           ierr = PetscFPTrapPop();CHKERRQ(ierr);
2603           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
2604 
2605           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
2606           start_rows = &temp_indices_to_constraint[temp_indices[total_counts]];
2607           /* insert cols for primal dofs */
2608           for (j=0;j<primal_dofs;j++) {
2609             start_vals = &qr_basis[j*size_of_constraint];
2610             start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
2611             ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2612           }
2613           /* insert cols for dual dofs */
2614           for (j=0,k=0;j<dual_dofs;k++) {
2615             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
2616               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2617               start_cols = &temp_indices_to_constraint[temp_indices[total_counts]+k];
2618               ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2619               j++;
2620             }
2621           }
2622 
2623           /* check change of basis */
2624           if (pcbddc->dbg_flag) {
2625             PetscInt   ii,jj;
2626             PetscBool valid_qr=PETSC_TRUE;
2627             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2628             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2629             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2630             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2631             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2632             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2633             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2634             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
2635             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2636             for (jj=0;jj<size_of_constraint;jj++) {
2637               for (ii=0;ii<primal_dofs;ii++) {
2638                 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2639                 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2640               }
2641             }
2642             if (!valid_qr) {
2643               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr);
2644               for (jj=0;jj<size_of_constraint;jj++) {
2645                 for (ii=0;ii<primal_dofs;ii++) {
2646                   if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2647                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2648                   }
2649                   if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2650                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2651                   }
2652                 }
2653               }
2654             } else {
2655               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr);
2656             }
2657           }
2658         } else { /* simple transformation block */
2659           PetscInt    row,col;
2660           PetscScalar val,norm;
2661 
2662           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2663           PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one,temp_quadrature_constraint+temp_indices[total_counts],&Blas_one));
2664           for (j=0;j<size_of_constraint;j++) {
2665             PetscInt row_B = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2666             row = temp_indices_to_constraint[temp_indices[total_counts]+j];
2667             if (!PetscBTLookup(is_primal,row_B)) {
2668               col = temp_indices_to_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2669               ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2670               ierr = MatSetValue(localChangeOfBasisMatrix,row,col,temp_quadrature_constraint[temp_indices[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr);
2671             } else {
2672               for (k=0;k<size_of_constraint;k++) {
2673                 col = temp_indices_to_constraint[temp_indices[total_counts]+k];
2674                 if (row != col) {
2675                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2676                 } else {
2677                   val = temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]]/norm;
2678                 }
2679                 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2680               }
2681             }
2682           }
2683           if (pcbddc->dbg_flag) {
2684             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr);
2685           }
2686         }
2687         /* increment primal counter */
2688         primal_counter += primal_dofs;
2689       } else {
2690         if (pcbddc->dbg_flag) {
2691           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2692         }
2693       }
2694       /* increment constraint counter total_counts */
2695       total_counts += primal_dofs;
2696     }
2697 
2698     /* free workspace */
2699     if (qr_needed) {
2700       if (pcbddc->dbg_flag) {
2701         ierr = PetscFree(dbg_work);CHKERRQ(ierr);
2702       }
2703       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2704       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2705       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2706       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2707       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2708     }
2709     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2710     ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711     ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2712 
2713     /* assembling of global change of variable */
2714     {
2715       Mat      tmat;
2716       PetscInt bs;
2717 
2718       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2719       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2720       ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr);
2721       ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr);
2722       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2723       ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr);
2724       ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr);
2725       ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr);
2726       ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2727       ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr);
2728       ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2729       ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2730       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2731       ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr);
2732       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2733       ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2734       ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr);
2735       ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr);
2736     }
2737     /* check */
2738     if (pcbddc->dbg_flag) {
2739       PetscReal error;
2740       Vec       x,x_change;
2741 
2742       ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr);
2743       ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr);
2744       ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
2745       ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr);
2746       ierr = VecScatterBegin(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2747       ierr = VecScatterEnd(matis->ctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2748       ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
2749       ierr = VecScatterBegin(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2750       ierr = VecScatterEnd(matis->ctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2751       ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr);
2752       ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr);
2753       ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr);
2754       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2755       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Error global vs local change: %1.6e\n",error);CHKERRQ(ierr);
2756       ierr = VecDestroy(&x);CHKERRQ(ierr);
2757       ierr = VecDestroy(&x_change);CHKERRQ(ierr);
2758     }
2759 
2760     /* adapt sub_schurs computed (if any) */
2761     if (pcbddc->use_deluxe_scaling) {
2762       PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
2763       if (sub_schurs->S_Ej_all) {
2764         Mat S_new,tmat;
2765         IS is_all_N;
2766 
2767         ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr);
2768         ierr = MatGetSubMatrixUnsorted(localChangeOfBasisMatrix,is_all_N,is_all_N,&tmat);CHKERRQ(ierr);
2769         ierr = ISDestroy(&is_all_N);CHKERRQ(ierr);
2770         ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2771         ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr);
2772         ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2773         sub_schurs->S_Ej_all = S_new;
2774         ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2775         if (sub_schurs->sum_S_Ej_all) {
2776           ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr);
2777           ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr);
2778           ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr);
2779           sub_schurs->sum_S_Ej_all = S_new;
2780           ierr = MatDestroy(&S_new);CHKERRQ(ierr);
2781         }
2782         ierr = MatDestroy(&tmat);CHKERRQ(ierr);
2783       }
2784     }
2785     ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr);
2786   } else if (pcbddc->user_ChangeOfBasisMatrix) {
2787     ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2788     pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
2789   }
2790 
2791   /* set up change of basis context */
2792   if (pcbddc->ChangeOfBasisMatrix) {
2793     PCBDDCChange_ctx change_ctx;
2794 
2795     if (!pcbddc->new_global_mat) {
2796       PetscInt global_size,local_size;
2797 
2798       ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr);
2799       ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr);
2800       ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->new_global_mat);CHKERRQ(ierr);
2801       ierr = MatSetSizes(pcbddc->new_global_mat,local_size,local_size,global_size,global_size);CHKERRQ(ierr);
2802       ierr = MatSetType(pcbddc->new_global_mat,MATSHELL);CHKERRQ(ierr);
2803       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT,(void (*)(void))PCBDDCMatMult_Private);CHKERRQ(ierr);
2804       ierr = MatShellSetOperation(pcbddc->new_global_mat,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCMatMultTranspose_Private);CHKERRQ(ierr);
2805       ierr = PetscNew(&change_ctx);CHKERRQ(ierr);
2806       ierr = MatShellSetContext(pcbddc->new_global_mat,change_ctx);CHKERRQ(ierr);
2807     } else {
2808       ierr = MatShellGetContext(pcbddc->new_global_mat,&change_ctx);CHKERRQ(ierr);
2809       ierr = MatDestroy(&change_ctx->global_change);CHKERRQ(ierr);
2810       ierr = VecDestroyVecs(2,&change_ctx->work);CHKERRQ(ierr);
2811     }
2812     if (!pcbddc->user_ChangeOfBasisMatrix) {
2813       ierr = PetscObjectReference((PetscObject)pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
2814       change_ctx->global_change = pcbddc->ChangeOfBasisMatrix;
2815     } else {
2816       ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr);
2817       change_ctx->global_change = pcbddc->user_ChangeOfBasisMatrix;
2818     }
2819     ierr = VecDuplicateVecs(pcis->vec1_global,2,&change_ctx->work);CHKERRQ(ierr);
2820     ierr = MatSetUp(pcbddc->new_global_mat);CHKERRQ(ierr);
2821     ierr = MatAssemblyBegin(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2822     ierr = MatAssemblyEnd(pcbddc->new_global_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2823   }
2824 
2825   /* get indices in local ordering for vertices and constraints */
2826   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2827     ierr = PetscMalloc1(olocal_primal_size,&oprimal_indices_local_idxs);CHKERRQ(ierr);
2828     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2829   }
2830   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2831   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2832   ierr = PetscMalloc1(pcbddc->local_primal_size,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2833   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2834   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2835   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2836   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2837   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2838   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2839   /* set quantities in PCBDDC data struct */
2840   pcbddc->n_actual_vertices = i;
2841   /* check if a new primal space has been introduced */
2842   pcbddc->new_primal_space_local = PETSC_TRUE;
2843   if (olocal_primal_size == pcbddc->local_primal_size) {
2844     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2845     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2846     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2847   }
2848   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2849   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2850 
2851   /* flush dbg viewer */
2852   if (pcbddc->dbg_flag) {
2853     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2854   }
2855 
2856   /* free workspace */
2857   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2858   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2859   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2860   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2861   if (!pcbddc->adaptive_selection) {
2862     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2863     ierr = PetscFree3(temp_quadrature_constraint,temp_indices_to_constraint,temp_indices_to_constraint_B);CHKERRQ(ierr);
2864   } else {
2865     ierr = PetscFree4(pcbddc->adaptive_constraints_n,
2866                       pcbddc->adaptive_constraints_ptrs,
2867                       pcbddc->adaptive_constraints_idxs,
2868                       pcbddc->adaptive_constraints_data);CHKERRQ(ierr);
2869     ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2870   }
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2876 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2877 {
2878   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2879   PC_IS       *pcis = (PC_IS*)pc->data;
2880   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2881   PetscInt    ierr,i,vertex_size,N;
2882   PetscViewer viewer=pcbddc->dbg_viewer;
2883 
2884   PetscFunctionBegin;
2885   /* Reset previously computed graph */
2886   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2887   /* Init local Graph struct */
2888   ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr);
2889   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping,N);CHKERRQ(ierr);
2890 
2891   /* Check validity of the csr graph passed in by the user */
2892   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2893     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2894   }
2895 
2896   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2897   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2898     PetscInt  *xadj,*adjncy;
2899     PetscInt  nvtxs;
2900     PetscBool flg_row=PETSC_FALSE;
2901 
2902     if (pcbddc->use_local_adj) {
2903 
2904       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2905       if (flg_row) {
2906         ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2907         pcbddc->computed_rowadj = PETSC_TRUE;
2908       }
2909       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2910     } else if (pcbddc->current_level && pcis->n_B) { /* just compute subdomain's connected components for coarser levels when the local boundary is not empty */
2911       IS                     is_dummy;
2912       ISLocalToGlobalMapping l2gmap_dummy;
2913       PetscInt               j,sum;
2914       PetscInt               *cxadj,*cadjncy;
2915       const PetscInt         *idxs;
2916       PCBDDCGraph            graph;
2917       PetscBT                is_on_boundary;
2918 
2919       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n,0,1,&is_dummy);CHKERRQ(ierr);
2920       ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr);
2921       ierr = ISDestroy(&is_dummy);CHKERRQ(ierr);
2922       ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
2923       ierr = PCBDDCGraphInit(graph,l2gmap_dummy,pcis->n);CHKERRQ(ierr);
2924       ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr);
2925       ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2926       if (flg_row) {
2927         graph->xadj = xadj;
2928         graph->adjncy = adjncy;
2929       }
2930       ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr);
2931       ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
2932       ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr);
2933 
2934       if (pcbddc->dbg_flag) {
2935         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] Found %d subdomains (local size %d)\n",PetscGlobalRank,graph->ncc,pcis->n);CHKERRQ(ierr);
2936         for (i=0;i<graph->ncc;i++) {
2937           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"[%d] %d cc size %d\n",PetscGlobalRank,i,graph->cptr[i+1]-graph->cptr[i]);CHKERRQ(ierr);
2938         }
2939       }
2940 
2941       ierr = PetscBTCreate(pcis->n,&is_on_boundary);CHKERRQ(ierr);
2942       ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2943       for (i=0;i<pcis->n_B;i++) {
2944         ierr = PetscBTSet(is_on_boundary,idxs[i]);CHKERRQ(ierr);
2945       }
2946       ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr);
2947 
2948       ierr = PetscCalloc1(pcis->n+1,&cxadj);CHKERRQ(ierr);
2949       sum = 0;
2950       for (i=0;i<graph->ncc;i++) {
2951         PetscInt sizecc = 0;
2952         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2953           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2954             sizecc++;
2955           }
2956         }
2957         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2958           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2959             cxadj[graph->queue[j]] = sizecc;
2960           }
2961         }
2962         sum += sizecc*sizecc;
2963       }
2964       ierr = PetscMalloc1(sum,&cadjncy);CHKERRQ(ierr);
2965       sum = 0;
2966       for (i=0;i<pcis->n;i++) {
2967         PetscInt temp = cxadj[i];
2968         cxadj[i] = sum;
2969         sum += temp;
2970       }
2971       cxadj[pcis->n] = sum;
2972       for (i=0;i<graph->ncc;i++) {
2973         for (j=graph->cptr[i];j<graph->cptr[i+1];j++) {
2974           if (PetscBTLookup(is_on_boundary,graph->queue[j])) {
2975             PetscInt k,sizecc = 0;
2976             for (k=graph->cptr[i];k<graph->cptr[i+1];k++) {
2977               if (PetscBTLookup(is_on_boundary,graph->queue[k])) {
2978                 cadjncy[cxadj[graph->queue[j]]+sizecc]=graph->queue[k];
2979                 sizecc++;
2980               }
2981             }
2982           }
2983         }
2984       }
2985       if (sum) {
2986         ierr = PCBDDCSetLocalAdjacencyGraph(pc,pcis->n,cxadj,cadjncy,PETSC_OWN_POINTER);CHKERRQ(ierr);
2987       } else {
2988         ierr = PetscFree(cxadj);CHKERRQ(ierr);
2989         ierr = PetscFree(cadjncy);CHKERRQ(ierr);
2990       }
2991       graph->xadj = 0;
2992       graph->adjncy = 0;
2993       ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
2994       ierr = PetscBTDestroy(&is_on_boundary);CHKERRQ(ierr);
2995     }
2996   }
2997   if (pcbddc->dbg_flag) {
2998     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2999   }
3000 
3001   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
3002   vertex_size = 1;
3003   if (pcbddc->user_provided_isfordofs) {
3004     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
3005       ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3006       for (i=0;i<pcbddc->n_ISForDofs;i++) {
3007         ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3008         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
3009       }
3010       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
3011       pcbddc->n_ISForDofs = 0;
3012       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
3013     }
3014     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
3015     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
3016   } else {
3017     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
3018       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
3019       ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
3020       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3021         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
3022       }
3023     }
3024   }
3025 
3026   /* Setup of Graph */
3027   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
3028     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
3029   }
3030   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
3031     ierr = PCBDDCGlobalToLocal(matis->ctx,pcis->vec1_global,pcis->vec1_N,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
3032   }
3033   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);CHKERRQ(ierr);
3034 
3035   /* Graph's connected components analysis */
3036   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
3037 
3038   /* print some info to stdout */
3039   if (pcbddc->dbg_flag) {
3040     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);CHKERRQ(ierr);
3041   }
3042 
3043   /* mark topography has done */
3044   pcbddc->recompute_topography = PETSC_FALSE;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 #undef __FUNCT__
3049 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
3050 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
3051 {
3052   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3053   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
3054   PetscErrorCode ierr;
3055 
3056   PetscFunctionBegin;
3057   n = 0;
3058   vertices = 0;
3059   if (pcbddc->ConstraintMatrix) {
3060     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
3061     for (i=0;i<local_primal_size;i++) {
3062       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3063       if (size_of_constraint == 1) n++;
3064       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3065     }
3066     if (vertices_idx) {
3067       ierr = PetscMalloc1(n,&vertices);CHKERRQ(ierr);
3068       n = 0;
3069       for (i=0;i<local_primal_size;i++) {
3070         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3071         if (size_of_constraint == 1) {
3072           vertices[n++]=row_cmat_indices[0];
3073         }
3074         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3075       }
3076     }
3077   }
3078   *n_vertices = n;
3079   if (vertices_idx) *vertices_idx = vertices;
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 #undef __FUNCT__
3084 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
3085 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
3086 {
3087   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
3088   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
3089   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
3090   PetscBT        touched;
3091   PetscErrorCode ierr;
3092 
3093     /* This function assumes that the number of local constraints per connected component
3094        is not greater than the number of nodes defined for the connected component
3095        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
3096   PetscFunctionBegin;
3097   n = 0;
3098   constraints_index = 0;
3099   if (pcbddc->ConstraintMatrix) {
3100     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
3101     max_size_of_constraint = 0;
3102     for (i=0;i<local_primal_size;i++) {
3103       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3104       if (size_of_constraint > 1) {
3105         n++;
3106       }
3107       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
3108       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
3109     }
3110     if (constraints_idx) {
3111       ierr = PetscMalloc1(n,&constraints_index);CHKERRQ(ierr);
3112       ierr = PetscMalloc1(max_size_of_constraint,&row_cmat_global_indices);CHKERRQ(ierr);
3113       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
3114       n = 0;
3115       for (i=0;i<local_primal_size;i++) {
3116         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3117         if (size_of_constraint > 1) {
3118           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
3119           /* find first untouched local node */
3120           j = 0;
3121           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
3122           min_index = row_cmat_global_indices[j];
3123           min_loc = j;
3124           /* search the minimum among nodes not yet touched on the connected component
3125              since there can be more than one constraint on a single cc */
3126           for (j=1;j<size_of_constraint;j++) {
3127             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
3128               min_index = row_cmat_global_indices[j];
3129               min_loc = j;
3130             }
3131           }
3132           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
3133           constraints_index[n++] = row_cmat_indices[min_loc];
3134         }
3135         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
3136       }
3137       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
3138       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
3139     }
3140   }
3141   *n_constraints = n;
3142   if (constraints_idx) *constraints_idx = constraints_index;
3143   PetscFunctionReturn(0);
3144 }
3145 
3146 #undef __FUNCT__
3147 #define __FUNCT__ "PCBDDCSubsetNumbering"
3148 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
3149 {
3150   Vec            local_vec,global_vec;
3151   IS             seqis,paris;
3152   VecScatter     scatter_ctx;
3153   PetscScalar    *array;
3154   PetscInt       *temp_global_dofs;
3155   PetscScalar    globalsum;
3156   PetscInt       i,j,s;
3157   PetscInt       nlocals,first_index,old_index,max_local,max_global;
3158   PetscMPIInt    rank_prec_comm,size_prec_comm;
3159   PetscInt       *dof_sizes,*dof_displs;
3160   PetscBool      first_found;
3161   PetscErrorCode ierr;
3162 
3163   PetscFunctionBegin;
3164   /* mpi buffers */
3165   ierr = MPI_Comm_size(comm,&size_prec_comm);CHKERRQ(ierr);
3166   ierr = MPI_Comm_rank(comm,&rank_prec_comm);CHKERRQ(ierr);
3167   j = ( !rank_prec_comm ? size_prec_comm : 0);
3168   ierr = PetscMalloc2(j,&dof_sizes,j,&dof_displs);CHKERRQ(ierr);
3169   /* get maximum size of subset */
3170   ierr = PetscMalloc1(n_local_dofs,&temp_global_dofs);CHKERRQ(ierr);
3171   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
3172   max_local = 0;
3173   for (i=0;i<n_local_dofs;i++) {
3174     if (max_local < temp_global_dofs[i] ) {
3175       max_local = temp_global_dofs[i];
3176     }
3177   }
3178   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);CHKERRQ(ierr);
3179   max_global++;
3180   max_local = 0;
3181   for (i=0;i<n_local_dofs;i++) {
3182     if (max_local < local_dofs[i] ) {
3183       max_local = local_dofs[i];
3184     }
3185   }
3186   max_local++;
3187   /* allocate workspace */
3188   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
3189   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
3190   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
3191   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
3192   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
3193   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
3194   /* create scatter */
3195   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
3196   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
3197   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
3198   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
3199   ierr = ISDestroy(&paris);CHKERRQ(ierr);
3200   /* init array */
3201   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
3202   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3203   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
3204   if (local_dofs_mult) {
3205     for (i=0;i<n_local_dofs;i++) {
3206       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
3207     }
3208   } else {
3209     for (i=0;i<n_local_dofs;i++) {
3210       array[local_dofs[i]]=1.0;
3211     }
3212   }
3213   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
3214   /* scatter into global vec and get total number of global dofs */
3215   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3216   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3217   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
3218   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
3219   /* Fill global_vec with cumulative function for global numbering */
3220   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
3221   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
3222   nlocals = 0;
3223   first_index = -1;
3224   first_found = PETSC_FALSE;
3225   for (i=0;i<s;i++) {
3226     if (!first_found && PetscRealPart(array[i]) > 0.1) {
3227       first_found = PETSC_TRUE;
3228       first_index = i;
3229     }
3230     nlocals += (PetscInt)PetscRealPart(array[i]);
3231   }
3232   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3233   if (!rank_prec_comm) {
3234     dof_displs[0]=0;
3235     for (i=1;i<size_prec_comm;i++) {
3236       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
3237     }
3238   }
3239   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
3240   if (first_found) {
3241     array[first_index] += (PetscScalar)nlocals;
3242     old_index = first_index;
3243     for (i=first_index+1;i<s;i++) {
3244       if (PetscRealPart(array[i]) > 0.1) {
3245         array[i] += array[old_index];
3246         old_index = i;
3247       }
3248     }
3249   }
3250   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
3251   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
3252   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3253   ierr = VecScatterEnd(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3254   /* get global ordering of local dofs */
3255   ierr = VecGetArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3256   if (local_dofs_mult) {
3257     for (i=0;i<n_local_dofs;i++) {
3258       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
3259     }
3260   } else {
3261     for (i=0;i<n_local_dofs;i++) {
3262       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
3263     }
3264   }
3265   ierr = VecRestoreArrayRead(local_vec,(const PetscScalar**)&array);CHKERRQ(ierr);
3266   /* free workspace */
3267   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
3268   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
3269   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
3270   ierr = PetscFree2(dof_sizes,dof_displs);CHKERRQ(ierr);
3271   /* return pointer to global ordering of local dofs */
3272   *global_numbering_subset = temp_global_dofs;
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
3278 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
3279 {
3280   PetscInt       i,j;
3281   PetscScalar    *alphas;
3282   PetscErrorCode ierr;
3283 
3284   PetscFunctionBegin;
3285   /* this implements stabilized Gram-Schmidt */
3286   ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr);
3287   for (i=0;i<n;i++) {
3288     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
3289     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
3290     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
3291   }
3292   ierr = PetscFree(alphas);CHKERRQ(ierr);
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatISGetSubassemblingPattern"
3298 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt n_subdomains, PetscBool contiguous, IS* is_sends)
3299 {
3300   Mat             subdomain_adj;
3301   IS              new_ranks,ranks_send_to;
3302   MatPartitioning partitioner;
3303   Mat_IS          *matis;
3304   PetscInt        n_neighs,*neighs,*n_shared,**shared;
3305   PetscInt        prank;
3306   PetscMPIInt     size,rank,color;
3307   PetscInt        *xadj,*adjncy,*oldranks;
3308   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
3309   PetscInt        i,local_size,threshold=0;
3310   PetscErrorCode  ierr;
3311   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
3312   PetscSubcomm    subcomm;
3313 
3314   PetscFunctionBegin;
3315   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
3316   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
3317   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
3318 
3319   /* Get info on mapping */
3320   matis = (Mat_IS*)(mat->data);
3321   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
3322   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3323 
3324   /* build local CSR graph of subdomains' connectivity */
3325   ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr);
3326   xadj[0] = 0;
3327   xadj[1] = PetscMax(n_neighs-1,0);
3328   ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr);
3329   ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr);
3330 
3331   if (threshold) {
3332     PetscInt xadj_count = 0;
3333     for (i=1;i<n_neighs;i++) {
3334       if (n_shared[i] > threshold) {
3335         adjncy[xadj_count] = neighs[i];
3336         adjncy_wgt[xadj_count] = n_shared[i];
3337         xadj_count++;
3338       }
3339     }
3340     xadj[1] = xadj_count;
3341   } else {
3342     if (xadj[1]) {
3343       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
3344       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
3345     }
3346   }
3347   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
3348   if (use_square) {
3349     for (i=0;i<xadj[1];i++) {
3350       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
3351     }
3352   }
3353   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3354 
3355   ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr);
3356 
3357   /*
3358     Restrict work on active processes only.
3359   */
3360   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
3361   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
3362   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
3363   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
3364   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3365   if (color) {
3366     ierr = PetscFree(xadj);CHKERRQ(ierr);
3367     ierr = PetscFree(adjncy);CHKERRQ(ierr);
3368     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
3369   } else {
3370     PetscInt coarsening_ratio;
3371     ierr = MPI_Comm_size(PetscSubcommChild(subcomm),&size);CHKERRQ(ierr);
3372     ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr);
3373     prank = rank;
3374     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,PetscSubcommChild(subcomm));CHKERRQ(ierr);
3375     /*
3376     for (i=0;i<size;i++) {
3377       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
3378     }
3379     */
3380     for (i=0;i<xadj[1];i++) {
3381       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
3382     }
3383     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
3384     ierr = MatCreateMPIAdj(PetscSubcommChild(subcomm),1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
3385     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
3386 
3387     /* Partition */
3388     ierr = MatPartitioningCreate(PetscSubcommChild(subcomm),&partitioner);CHKERRQ(ierr);
3389     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
3390     if (use_vwgt) {
3391       ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr);
3392       v_wgt[0] = local_size;
3393       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
3394     }
3395     n_subdomains = PetscMin((PetscInt)size,n_subdomains);
3396     coarsening_ratio = size/n_subdomains;
3397     ierr = MatPartitioningSetNParts(partitioner,n_subdomains);CHKERRQ(ierr);
3398     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
3399     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
3400     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
3401 
3402     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3403     if (contiguous) {
3404       ranks_send_to_idx[0] = oldranks[is_indices[0]]; /* contiguos set of processes */
3405     } else {
3406       ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
3407     }
3408     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
3409     /* clean up */
3410     ierr = PetscFree(oldranks);CHKERRQ(ierr);
3411     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
3412     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
3413     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
3414   }
3415   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3416 
3417   /* assemble parallel IS for sends */
3418   i = 1;
3419   if (color) i=0;
3420   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
3421 
3422   /* get back IS */
3423   *is_sends = ranks_send_to;
3424   PetscFunctionReturn(0);
3425 }
3426 
3427 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
3428 
3429 #undef __FUNCT__
3430 #define __FUNCT__ "MatISSubassemble"
3431 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, MatReuse reuse, Mat *mat_n, PetscInt nis, IS isarray[])
3432 {
3433   Mat                    local_mat;
3434   Mat_IS                 *matis;
3435   IS                     is_sends_internal;
3436   PetscInt               rows,cols,new_local_rows;
3437   PetscInt               i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals;
3438   PetscBool              ismatis,isdense,newisdense,destroy_mat;
3439   ISLocalToGlobalMapping l2gmap;
3440   PetscInt*              l2gmap_indices;
3441   const PetscInt*        is_indices;
3442   MatType                new_local_type;
3443   /* buffers */
3444   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
3445   PetscInt               *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
3446   PetscInt               *recv_buffer_idxs_local;
3447   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
3448   /* MPI */
3449   MPI_Comm               comm,comm_n;
3450   PetscSubcomm           subcomm;
3451   PetscMPIInt            n_sends,n_recvs,commsize;
3452   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
3453   PetscMPIInt            *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
3454   PetscMPIInt            len,tag_idxs,tag_idxs_is,tag_vals,source_dest;
3455   MPI_Request            *send_req_idxs,*send_req_idxs_is,*send_req_vals;
3456   MPI_Request            *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals;
3457   PetscErrorCode         ierr;
3458 
3459   PetscFunctionBegin;
3460   /* TODO: add missing checks */
3461   PetscValidLogicalCollectiveInt(mat,n_subdomains,3);
3462   PetscValidLogicalCollectiveBool(mat,restrict_comm,4);
3463   PetscValidLogicalCollectiveEnum(mat,reuse,5);
3464   PetscValidLogicalCollectiveInt(mat,nis,7);
3465   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
3466   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",__FUNCT__);
3467   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3468   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
3469   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
3470   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
3471   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
3472   if (reuse == MAT_REUSE_MATRIX && *mat_n) {
3473     PetscInt mrows,mcols,mnrows,mncols;
3474     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
3475     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
3476     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
3477     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
3478     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
3479     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
3480   }
3481   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
3482   PetscValidLogicalCollectiveInt(mat,bs,0);
3483   /* prepare IS for sending if not provided */
3484   if (!is_sends) {
3485     PetscBool pcontig = PETSC_TRUE;
3486     if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
3487     ierr = MatISGetSubassemblingPattern(mat,n_subdomains,pcontig,&is_sends_internal);CHKERRQ(ierr);
3488   } else {
3489     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
3490     is_sends_internal = is_sends;
3491   }
3492 
3493   /* get pointer of MATIS data */
3494   matis = (Mat_IS*)mat->data;
3495 
3496   /* get comm */
3497   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3498 
3499   /* compute number of sends */
3500   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
3501   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
3502 
3503   /* compute number of receives */
3504   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
3505   ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr);
3506   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
3507   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3508   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
3509   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
3510   ierr = PetscFree(iflags);CHKERRQ(ierr);
3511 
3512   /* restrict comm if requested */
3513   subcomm = 0;
3514   destroy_mat = PETSC_FALSE;
3515   if (restrict_comm) {
3516     PetscMPIInt color,subcommsize;
3517 
3518     color = 0;
3519     if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm */
3520     ierr = MPI_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
3521     subcommsize = commsize - subcommsize;
3522     /* check if reuse has been requested */
3523     if (reuse == MAT_REUSE_MATRIX) {
3524       if (*mat_n) {
3525         PetscMPIInt subcommsize2;
3526         ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr);
3527         if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
3528         comm_n = PetscObjectComm((PetscObject)*mat_n);
3529       } else {
3530         comm_n = PETSC_COMM_SELF;
3531       }
3532     } else { /* MAT_INITIAL_MATRIX */
3533       PetscMPIInt rank;
3534 
3535       ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3536       ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr);
3537       ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr);
3538       ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
3539       comm_n = PetscSubcommChild(subcomm);
3540     }
3541     /* flag to destroy *mat_n if not significative */
3542     if (color) destroy_mat = PETSC_TRUE;
3543   } else {
3544     comm_n = comm;
3545   }
3546 
3547   /* prepare send/receive buffers */
3548   ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr);
3549   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
3550   ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr);
3551   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
3552   if (nis) {
3553     ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr);
3554   }
3555 
3556   /* Get data from local matrices */
3557   if (!isdense) {
3558     SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
3559     /* TODO: See below some guidelines on how to prepare the local buffers */
3560     /*
3561        send_buffer_vals should contain the raw values of the local matrix
3562        send_buffer_idxs should contain:
3563        - MatType_PRIVATE type
3564        - PetscInt        size_of_l2gmap
3565        - PetscInt        global_row_indices[size_of_l2gmap]
3566        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
3567     */
3568   } else {
3569     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3570     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
3571     ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr);
3572     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
3573     send_buffer_idxs[1] = i;
3574     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3575     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
3576     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
3577     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
3578     for (i=0;i<n_sends;i++) {
3579       ilengths_vals[is_indices[i]] = len*len;
3580       ilengths_idxs[is_indices[i]] = len+2;
3581     }
3582   }
3583   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
3584   /* additional is (if any) */
3585   if (nis) {
3586     PetscMPIInt psum;
3587     PetscInt j;
3588     for (j=0,psum=0;j<nis;j++) {
3589       PetscInt plen;
3590       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3591       ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr);
3592       psum += len+1; /* indices + lenght */
3593     }
3594     ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr);
3595     for (j=0,psum=0;j<nis;j++) {
3596       PetscInt plen;
3597       const PetscInt *is_array_idxs;
3598       ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr);
3599       send_buffer_idxs_is[psum] = plen;
3600       ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3601       ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr);
3602       ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr);
3603       psum += plen+1; /* indices + lenght */
3604     }
3605     for (i=0;i<n_sends;i++) {
3606       ilengths_idxs_is[is_indices[i]] = psum;
3607     }
3608     ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr);
3609   }
3610 
3611   buf_size_idxs = 0;
3612   buf_size_vals = 0;
3613   buf_size_idxs_is = 0;
3614   for (i=0;i<n_recvs;i++) {
3615     buf_size_idxs += (PetscInt)olengths_idxs[i];
3616     buf_size_vals += (PetscInt)olengths_vals[i];
3617     if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
3618   }
3619   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr);
3620   ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr);
3621   ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr);
3622 
3623   /* get new tags for clean communications */
3624   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
3625   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
3626   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr);
3627 
3628   /* allocate for requests */
3629   ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr);
3630   ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr);
3631   ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr);
3632   ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr);
3633   ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr);
3634   ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr);
3635 
3636   /* communications */
3637   ptr_idxs = recv_buffer_idxs;
3638   ptr_vals = recv_buffer_vals;
3639   ptr_idxs_is = recv_buffer_idxs_is;
3640   for (i=0;i<n_recvs;i++) {
3641     source_dest = onodes[i];
3642     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
3643     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
3644     ptr_idxs += olengths_idxs[i];
3645     ptr_vals += olengths_vals[i];
3646     if (nis) {
3647       ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr);
3648       ptr_idxs_is += olengths_idxs_is[i];
3649     }
3650   }
3651   for (i=0;i<n_sends;i++) {
3652     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
3653     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3654     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3655     if (nis) {
3656       ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr);
3657     }
3658   }
3659   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3660   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3661 
3662   /* assemble new l2g map */
3663   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3664   ptr_idxs = recv_buffer_idxs;
3665   new_local_rows = 0;
3666   for (i=0;i<n_recvs;i++) {
3667     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3668     ptr_idxs += olengths_idxs[i];
3669   }
3670   ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr);
3671   ptr_idxs = recv_buffer_idxs;
3672   new_local_rows = 0;
3673   for (i=0;i<n_recvs;i++) {
3674     ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3675     new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3676     ptr_idxs += olengths_idxs[i];
3677   }
3678   ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr);
3679   ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3680   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3681 
3682   /* infer new local matrix type from received local matrices type */
3683   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3684   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3685   if (n_recvs) {
3686     MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3687     ptr_idxs = recv_buffer_idxs;
3688     for (i=0;i<n_recvs;i++) {
3689       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3690         new_local_type_private = MATAIJ_PRIVATE;
3691         break;
3692       }
3693       ptr_idxs += olengths_idxs[i];
3694     }
3695     switch (new_local_type_private) {
3696       case MATDENSE_PRIVATE:
3697         if (n_recvs>1) { /* subassembling of dense matrices does not give a dense matrix! */
3698           new_local_type = MATSEQAIJ;
3699           bs = 1;
3700         } else { /* if I receive only 1 dense matrix */
3701           new_local_type = MATSEQDENSE;
3702           bs = 1;
3703         }
3704         break;
3705       case MATAIJ_PRIVATE:
3706         new_local_type = MATSEQAIJ;
3707         bs = 1;
3708         break;
3709       case MATBAIJ_PRIVATE:
3710         new_local_type = MATSEQBAIJ;
3711         break;
3712       case MATSBAIJ_PRIVATE:
3713         new_local_type = MATSEQSBAIJ;
3714         break;
3715       default:
3716         SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,__FUNCT__);
3717         break;
3718     }
3719   } else { /* by default, new_local_type is seqdense */
3720     new_local_type = MATSEQDENSE;
3721     bs = 1;
3722   }
3723 
3724   /* create MATIS object if needed */
3725   if (reuse == MAT_INITIAL_MATRIX) {
3726     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3727     ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3728   } else {
3729     /* it also destroys the local matrices */
3730     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3731   }
3732   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3733   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3734 
3735   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3736 
3737   /* Global to local map of received indices */
3738   ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */
3739   ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr);
3740   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3741 
3742   /* restore attributes -> type of incoming data and its size */
3743   buf_size_idxs = 0;
3744   for (i=0;i<n_recvs;i++) {
3745     recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
3746     recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
3747     buf_size_idxs += (PetscInt)olengths_idxs[i];
3748   }
3749   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3750 
3751   /* set preallocation */
3752   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr);
3753   if (!newisdense) {
3754     PetscInt *new_local_nnz=0;
3755 
3756     ptr_vals = recv_buffer_vals;
3757     ptr_idxs = recv_buffer_idxs_local;
3758     if (n_recvs) {
3759       ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr);
3760     }
3761     for (i=0;i<n_recvs;i++) {
3762       PetscInt j;
3763       if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
3764         for (j=0;j<*(ptr_idxs+1);j++) {
3765           new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
3766         }
3767       } else {
3768         /* TODO */
3769       }
3770       ptr_idxs += olengths_idxs[i];
3771     }
3772     if (new_local_nnz) {
3773       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
3774       ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr);
3775       for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
3776       ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3777       for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
3778       ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr);
3779     } else {
3780       ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3781     }
3782     ierr = PetscFree(new_local_nnz);CHKERRQ(ierr);
3783   } else {
3784     ierr = MatSetUp(local_mat);CHKERRQ(ierr);
3785   }
3786 
3787   /* set values */
3788   ptr_vals = recv_buffer_vals;
3789   ptr_idxs = recv_buffer_idxs_local;
3790   for (i=0;i<n_recvs;i++) {
3791     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3792       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3793       ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3794       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3795       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3796       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3797     } else {
3798       /* TODO */
3799     }
3800     ptr_idxs += olengths_idxs[i];
3801     ptr_vals += olengths_vals[i];
3802   }
3803   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3804   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3805   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3806   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3807   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3808   ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr);
3809 
3810 #if 0
3811   if (!restrict_comm) { /* check */
3812     Vec       lvec,rvec;
3813     PetscReal infty_error;
3814 
3815     ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3816     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3817     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3818     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3819     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3820     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3821     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3822     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3823     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3824   }
3825 #endif
3826 
3827   /* assemble new additional is (if any) */
3828   if (nis) {
3829     PetscInt **temp_idxs,*count_is,j,psum;
3830 
3831     ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3832     ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr);
3833     ptr_idxs = recv_buffer_idxs_is;
3834     psum = 0;
3835     for (i=0;i<n_recvs;i++) {
3836       for (j=0;j<nis;j++) {
3837         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3838         count_is[j] += plen; /* increment counting of buffer for j-th IS */
3839         psum += plen;
3840         ptr_idxs += plen+1; /* shift pointer to received data */
3841       }
3842     }
3843     ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr);
3844     ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr);
3845     for (i=1;i<nis;i++) {
3846       temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
3847     }
3848     ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr);
3849     ptr_idxs = recv_buffer_idxs_is;
3850     for (i=0;i<n_recvs;i++) {
3851       for (j=0;j<nis;j++) {
3852         PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
3853         ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr);
3854         count_is[j] += plen; /* increment starting point of buffer for j-th IS */
3855         ptr_idxs += plen+1; /* shift pointer to received data */
3856       }
3857     }
3858     for (i=0;i<nis;i++) {
3859       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3860       ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr);
3861       ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
3862     }
3863     ierr = PetscFree(count_is);CHKERRQ(ierr);
3864     ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr);
3865     ierr = PetscFree(temp_idxs);CHKERRQ(ierr);
3866   }
3867   /* free workspace */
3868   ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr);
3869   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3870   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3871   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3872   if (isdense) {
3873     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3874     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3875   } else {
3876     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3877   }
3878   if (nis) {
3879     ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3880     ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr);
3881   }
3882   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3883   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3884   ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr);
3885   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3886   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3887   ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr);
3888   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3889   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3890   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3891   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3892   ierr = PetscFree(onodes);CHKERRQ(ierr);
3893   if (nis) {
3894     ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr);
3895     ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr);
3896     ierr = PetscFree(onodes_is);CHKERRQ(ierr);
3897   }
3898   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
3899   if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
3900     ierr = MatDestroy(mat_n);CHKERRQ(ierr);
3901     for (i=0;i<nis;i++) {
3902       ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
3903     }
3904   }
3905   PetscFunctionReturn(0);
3906 }
3907 
3908 /* temporary hack into ksp private data structure */
3909 #include <petsc/private/kspimpl.h>
3910 
3911 #undef __FUNCT__
3912 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3913 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3914 {
3915   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3916   PC_IS                  *pcis = (PC_IS*)pc->data;
3917   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3918   MatNullSpace           CoarseNullSpace=NULL;
3919   ISLocalToGlobalMapping coarse_islg;
3920   IS                     coarse_is,*isarray;
3921   PetscInt               i,im_active=-1,active_procs=-1;
3922   PetscInt               nis,nisdofs,nisneu;
3923   PC                     pc_temp;
3924   PCType                 coarse_pc_type;
3925   KSPType                coarse_ksp_type;
3926   PetscBool              multilevel_requested,multilevel_allowed;
3927   PetscBool              isredundant,isbddc,isnn,coarse_reuse;
3928   Mat                    t_coarse_mat_is;
3929   PetscInt               void_procs,ncoarse_ml,ncoarse_ds,ncoarse;
3930   PetscMPIInt            all_procs;
3931   PetscBool              csin_ml,csin_ds,csin,csin_type_simple,redist;
3932   PetscBool              compute_vecs = PETSC_FALSE;
3933   PetscScalar            *array;
3934   PetscErrorCode         ierr;
3935 
3936   PetscFunctionBegin;
3937   /* Assign global numbering to coarse dofs */
3938   if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
3939     PetscInt ocoarse_size;
3940     compute_vecs = PETSC_TRUE;
3941     ocoarse_size = pcbddc->coarse_size;
3942     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3943     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3944     /* see if we can avoid some work */
3945     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3946       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3947         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3948         coarse_reuse = PETSC_FALSE;
3949       } else { /* we can safely reuse already computed coarse matrix */
3950         coarse_reuse = PETSC_TRUE;
3951       }
3952     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3953       coarse_reuse = PETSC_FALSE;
3954     }
3955     /* reset any subassembling information */
3956     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3957     ierr = ISDestroy(&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
3958   } else { /* primal space is unchanged, so we can reuse coarse matrix */
3959     coarse_reuse = PETSC_TRUE;
3960   }
3961 
3962   /* count "active" (i.e. with positive local size) and "void" processes */
3963   im_active = !!(pcis->n);
3964   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3965   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&all_procs);CHKERRQ(ierr);
3966   void_procs = all_procs-active_procs;
3967   csin_type_simple = PETSC_TRUE;
3968   redist = PETSC_FALSE;
3969   if (pcbddc->current_level && void_procs) {
3970     csin_ml = PETSC_TRUE;
3971     ncoarse_ml = void_procs;
3972     /* it has no sense to redistribute on a set of processors larger than the number of active processes */
3973     if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < active_procs) {
3974       csin_ds = PETSC_TRUE;
3975       ncoarse_ds = pcbddc->redistribute_coarse;
3976       redist = PETSC_TRUE;
3977     } else {
3978       csin_ds = PETSC_TRUE;
3979       ncoarse_ds = active_procs;
3980       redist = PETSC_TRUE;
3981     }
3982   } else {
3983     csin_ml = PETSC_FALSE;
3984     ncoarse_ml = all_procs;
3985     if (void_procs) {
3986       csin_ds = PETSC_TRUE;
3987       ncoarse_ds = void_procs;
3988       csin_type_simple = PETSC_FALSE;
3989     } else {
3990       if (pcbddc->redistribute_coarse > 0 && pcbddc->redistribute_coarse < all_procs) {
3991         csin_ds = PETSC_TRUE;
3992         ncoarse_ds = pcbddc->redistribute_coarse;
3993         redist = PETSC_TRUE;
3994       } else {
3995         csin_ds = PETSC_FALSE;
3996         ncoarse_ds = all_procs;
3997       }
3998     }
3999   }
4000 
4001   /*
4002     test if we can go multilevel: three conditions must be satisfied:
4003     - we have not exceeded the number of levels requested
4004     - we can actually subassemble the active processes
4005     - we can find a suitable number of MPI processes where we can place the subassembled problem
4006   */
4007   multilevel_allowed = PETSC_FALSE;
4008   multilevel_requested = PETSC_FALSE;
4009   if (pcbddc->current_level < pcbddc->max_levels) {
4010     multilevel_requested = PETSC_TRUE;
4011     if (active_procs/pcbddc->coarsening_ratio < 2 || ncoarse_ml/pcbddc->coarsening_ratio < 2) {
4012       multilevel_allowed = PETSC_FALSE;
4013     } else {
4014       multilevel_allowed = PETSC_TRUE;
4015     }
4016   }
4017   /* determine number of process partecipating to coarse solver */
4018   if (multilevel_allowed) {
4019     ncoarse = ncoarse_ml;
4020     csin = csin_ml;
4021     redist = PETSC_FALSE;
4022   } else {
4023     ncoarse = ncoarse_ds;
4024     csin = csin_ds;
4025   }
4026 
4027   /* creates temporary l2gmap and IS for coarse indexes */
4028   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
4029   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
4030 
4031   /* creates temporary MATIS object for coarse matrix */
4032   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr);
4033   ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4034   ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr);
4035   ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr);
4036 #if 0
4037   {
4038     PetscViewer viewer;
4039     char filename[256];
4040     sprintf(filename,"local_coarse_mat%d.m",PetscGlobalRank);
4041     ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr);
4042     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4043     ierr = MatView(coarse_submat_dense,viewer);CHKERRQ(ierr);
4044     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4045   }
4046 #endif
4047   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&t_coarse_mat_is);CHKERRQ(ierr);
4048   ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
4049   ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4050   ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4051   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
4052 
4053   /* compute dofs splitting and neumann boundaries for coarse dofs */
4054   if (multilevel_allowed && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal) ) { /* protects from unneded computations */
4055     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
4056     const PetscInt         *idxs;
4057     ISLocalToGlobalMapping tmap;
4058 
4059     /* create map between primal indices (in local representative ordering) and local primal numbering */
4060     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
4061     /* allocate space for temporary storage */
4062     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr);
4063     ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr);
4064     /* allocate for IS array */
4065     nisdofs = pcbddc->n_ISForDofsLocal;
4066     nisneu = !!pcbddc->NeumannBoundariesLocal;
4067     nis = nisdofs + nisneu;
4068     ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr);
4069     /* dofs splitting */
4070     for (i=0;i<nisdofs;i++) {
4071       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
4072       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
4073       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4074       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4075       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
4076       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4077       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr);
4078       /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */
4079     }
4080     /* neumann boundaries */
4081     if (pcbddc->NeumannBoundariesLocal) {
4082       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
4083       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
4084       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4085       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
4086       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
4087       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
4088       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr);
4089       /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */
4090     }
4091     /* free memory */
4092     ierr = PetscFree(tidxs);CHKERRQ(ierr);
4093     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
4094     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
4095   } else {
4096     nis = 0;
4097     nisdofs = 0;
4098     nisneu = 0;
4099     isarray = NULL;
4100   }
4101   /* destroy no longer needed map */
4102   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
4103 
4104   /* restrict on coarse candidates (if needed) */
4105   coarse_mat_is = NULL;
4106   if (csin) {
4107     if (!pcbddc->coarse_subassembling_init ) { /* creates subassembling init pattern if not present */
4108       if (redist) {
4109         PetscMPIInt rank;
4110         PetscInt    spc,n_spc_p1,dest[1],destsize;
4111 
4112         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4113         spc = active_procs/ncoarse;
4114         n_spc_p1 = active_procs%ncoarse;
4115         if (im_active) {
4116           destsize = 1;
4117           if (rank > n_spc_p1*(spc+1)-1) {
4118             dest[0] = n_spc_p1+(rank-(n_spc_p1*(spc+1)))/spc;
4119           } else {
4120             dest[0] = rank/(spc+1);
4121           }
4122         } else {
4123           destsize = 0;
4124         }
4125         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),destsize,dest,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4126       } else if (csin_type_simple) {
4127         PetscMPIInt rank;
4128         PetscInt    issize,isidx;
4129 
4130         ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr);
4131         if (im_active) {
4132           issize = 1;
4133           isidx = (PetscInt)rank;
4134         } else {
4135           issize = 0;
4136           isidx = -1;
4137         }
4138         ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),issize,&isidx,PETSC_COPY_VALUES,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4139       } else { /* get a suitable subassembling pattern from MATIS code */
4140         ierr = MatISGetSubassemblingPattern(t_coarse_mat_is,ncoarse,PETSC_TRUE,&pcbddc->coarse_subassembling_init);CHKERRQ(ierr);
4141       }
4142 
4143       /* we need to shift on coarse candidates either if we are not redistributing or we are redistributing and we have enough void processes */
4144       if (!redist || ncoarse <= void_procs) {
4145         PetscInt ncoarse_cand,tissize,*nisindices;
4146         PetscInt *coarse_candidates;
4147         const PetscInt* tisindices;
4148 
4149         /* get coarse candidates' ranks in pc communicator */
4150         ierr = PetscMalloc1(all_procs,&coarse_candidates);CHKERRQ(ierr);
4151         ierr = MPI_Allgather(&im_active,1,MPIU_INT,coarse_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4152         for (i=0,ncoarse_cand=0;i<all_procs;i++) {
4153           if (!coarse_candidates[i]) {
4154             coarse_candidates[ncoarse_cand++]=i;
4155           }
4156         }
4157         if (ncoarse_cand < ncoarse) SETERRQ2(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen! %d < %d",ncoarse_cand,ncoarse);
4158 
4159 
4160         if (pcbddc->dbg_flag) {
4161           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4162           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init (before shift)\n");CHKERRQ(ierr);
4163           ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4164           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse candidates\n");CHKERRQ(ierr);
4165           for (i=0;i<ncoarse_cand;i++) {
4166             ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"%d ",coarse_candidates[i]);CHKERRQ(ierr);
4167           }
4168           ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"\n");CHKERRQ(ierr);
4169           ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4170         }
4171         /* shift the pattern on coarse candidates */
4172         ierr = ISGetLocalSize(pcbddc->coarse_subassembling_init,&tissize);CHKERRQ(ierr);
4173         ierr = ISGetIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4174         ierr = PetscMalloc1(tissize,&nisindices);CHKERRQ(ierr);
4175         for (i=0;i<tissize;i++) nisindices[i] = coarse_candidates[tisindices[i]];
4176         ierr = ISRestoreIndices(pcbddc->coarse_subassembling_init,&tisindices);CHKERRQ(ierr);
4177         ierr = ISGeneralSetIndices(pcbddc->coarse_subassembling_init,tissize,nisindices,PETSC_OWN_POINTER);CHKERRQ(ierr);
4178         ierr = PetscFree(coarse_candidates);CHKERRQ(ierr);
4179       }
4180       if (pcbddc->dbg_flag) {
4181         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4182         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init\n");CHKERRQ(ierr);
4183         ierr = ISView(pcbddc->coarse_subassembling_init,pcbddc->dbg_viewer);CHKERRQ(ierr);
4184         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4185       }
4186     }
4187     /* get temporary coarse mat in IS format restricted on coarse procs (plus additional index sets of isarray) */
4188     ierr = MatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling_init,0,PETSC_TRUE,MAT_INITIAL_MATRIX,&coarse_mat_is,nis,isarray);CHKERRQ(ierr);
4189   } else {
4190     if (pcbddc->dbg_flag) {
4191       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4192       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern init not needed\n");CHKERRQ(ierr);
4193       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4194     }
4195     ierr = PetscObjectReference((PetscObject)t_coarse_mat_is);CHKERRQ(ierr);
4196     coarse_mat_is = t_coarse_mat_is;
4197   }
4198 
4199   /* create local to global scatters for coarse problem */
4200   if (compute_vecs) {
4201     PetscInt lrows;
4202     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
4203     if (coarse_mat_is) {
4204       ierr = MatGetLocalSize(coarse_mat_is,&lrows,NULL);CHKERRQ(ierr);
4205     } else {
4206       lrows = 0;
4207     }
4208     ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr);
4209     ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr);
4210     ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr);
4211     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4212     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
4213   }
4214   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
4215   ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr);
4216 
4217   /* set defaults for coarse KSP and PC */
4218   if (multilevel_allowed) {
4219     coarse_ksp_type = KSPRICHARDSON;
4220     coarse_pc_type = PCBDDC;
4221   } else {
4222     coarse_ksp_type = KSPPREONLY;
4223     coarse_pc_type = PCREDUNDANT;
4224   }
4225 
4226   /* print some info if requested */
4227   if (pcbddc->dbg_flag) {
4228     if (!multilevel_allowed) {
4229       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4230       if (multilevel_requested) {
4231         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4232       } else if (pcbddc->max_levels) {
4233         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
4234       }
4235       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4236     }
4237   }
4238 
4239   /* create the coarse KSP object only once with defaults */
4240   if (coarse_mat_is) {
4241     MatReuse coarse_mat_reuse;
4242     PetscViewer dbg_viewer = NULL;
4243     if (pcbddc->dbg_flag) {
4244       dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat_is));
4245       ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4246     }
4247     if (!pcbddc->coarse_ksp) {
4248       char prefix[256],str_level[16];
4249       size_t len;
4250       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat_is),&pcbddc->coarse_ksp);CHKERRQ(ierr);
4251       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
4252       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
4253       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat_is,coarse_mat_is);CHKERRQ(ierr);
4254       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
4255       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
4256       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4257       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4258       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
4259       /* prefix */
4260       ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
4261       ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
4262       if (!pcbddc->current_level) {
4263         ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
4264         ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
4265       } else {
4266         ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
4267         if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
4268         if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
4269         ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr);
4270         sprintf(str_level,"l%d_",(int)(pcbddc->current_level));
4271         ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
4272       }
4273       ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
4274       /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4275       ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
4276       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
4277       ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
4278       /* allow user customization */
4279       ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
4280     }
4281     /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
4282     if (nisdofs) {
4283       ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr);
4284       for (i=0;i<nisdofs;i++) {
4285         ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr);
4286       }
4287     }
4288     if (nisneu) {
4289       ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr);
4290       ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr);
4291     }
4292 
4293     /* get some info after set from options */
4294     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
4295     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
4296     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
4297     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr);
4298     if (isbddc && !multilevel_allowed) { /* multilevel can only be requested via pc_bddc_set_levels */
4299       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
4300       isbddc = PETSC_FALSE;
4301     }
4302     if (isredundant) {
4303       KSP inner_ksp;
4304       PC inner_pc;
4305       ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr);
4306       ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr);
4307       ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr);
4308     }
4309 
4310     /* assemble coarse matrix */
4311     if (coarse_reuse) {
4312       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr);
4313       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
4314       coarse_mat_reuse = MAT_REUSE_MATRIX;
4315     } else {
4316       coarse_mat_reuse = MAT_INITIAL_MATRIX;
4317     }
4318     if (isbddc || isnn) {
4319       if (pcbddc->coarsening_ratio > 1) {
4320         if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
4321           ierr = MatISGetSubassemblingPattern(coarse_mat_is,active_procs/pcbddc->coarsening_ratio,PETSC_TRUE,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
4322           if (pcbddc->dbg_flag) {
4323             ierr = PetscViewerASCIIPrintf(dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4324             ierr = PetscViewerASCIIPrintf(dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
4325             ierr = ISView(pcbddc->coarse_subassembling,dbg_viewer);CHKERRQ(ierr);
4326             ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4327           }
4328         }
4329         ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,0,PETSC_FALSE,coarse_mat_reuse,&coarse_mat,0,NULL);CHKERRQ(ierr);
4330       } else {
4331         ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr);
4332         coarse_mat = coarse_mat_is;
4333       }
4334     } else {
4335       ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr);
4336     }
4337     ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
4338 
4339     /* propagate symmetry info to coarse matrix */
4340     ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pcbddc->issym);CHKERRQ(ierr);
4341     ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
4342 
4343     /* set operators */
4344     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4345     if (pcbddc->dbg_flag) {
4346       ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr);
4347     }
4348   } else { /* processes non partecipating to coarse solver (if any) */
4349     coarse_mat = 0;
4350   }
4351   ierr = PetscFree(isarray);CHKERRQ(ierr);
4352 #if 0
4353   {
4354     PetscViewer viewer;
4355     char filename[256];
4356     sprintf(filename,"coarse_mat.m");
4357     ierr = PetscViewerASCIIOpen(PETSC_COMM_WORLD,filename,&viewer);CHKERRQ(ierr);
4358     ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
4359     ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr);
4360     ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
4361   }
4362 #endif
4363 
4364   /* Compute coarse null space (special handling by BDDC only) */
4365   if (pcbddc->NullSpace) {
4366     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
4367   }
4368 
4369   if (pcbddc->coarse_ksp) {
4370     Vec crhs,csol;
4371     PetscBool ispreonly;
4372     if (CoarseNullSpace) {
4373       if (isbddc) {
4374         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
4375       } else {
4376         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
4377       }
4378     }
4379     /* setup coarse ksp */
4380     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
4381     ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr);
4382     ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr);
4383     /* hack */
4384     if (!csol) {
4385       ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr);
4386     }
4387     if (!crhs) {
4388       ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr);
4389     }
4390     /* Check coarse problem if in debug mode or if solving with an iterative method */
4391     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
4392     if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) {
4393       KSP       check_ksp;
4394       KSPType   check_ksp_type;
4395       PC        check_pc;
4396       Vec       check_vec,coarse_vec;
4397       PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
4398       PetscInt  its;
4399       PetscBool compute_eigs;
4400       PetscReal *eigs_r,*eigs_c;
4401       PetscInt  neigs;
4402       const char *prefix;
4403 
4404       /* Create ksp object suitable for estimation of extreme eigenvalues */
4405       ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr);
4406       ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr);
4407       ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
4408       if (ispreonly) {
4409         check_ksp_type = KSPPREONLY;
4410         compute_eigs = PETSC_FALSE;
4411       } else {
4412         check_ksp_type = KSPGMRES;
4413         compute_eigs = PETSC_TRUE;
4414       }
4415       ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
4416       ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr);
4417       ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr);
4418       ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr);
4419       ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr);
4420       ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr);
4421       ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr);
4422       ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr);
4423       ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
4424       ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
4425       ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
4426       /* create random vec */
4427       ierr = KSPGetSolution(pcbddc->coarse_ksp,&coarse_vec);CHKERRQ(ierr);
4428       ierr = VecDuplicate(coarse_vec,&check_vec);CHKERRQ(ierr);
4429       ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
4430       if (CoarseNullSpace) {
4431         ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
4432       }
4433       ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4434       /* solve coarse problem */
4435       ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr);
4436       if (CoarseNullSpace) {
4437         ierr = MatNullSpaceRemove(CoarseNullSpace,coarse_vec);CHKERRQ(ierr);
4438       }
4439       /* set eigenvalue estimation if preonly has not been requested */
4440       if (compute_eigs) {
4441         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr);
4442         ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr);
4443         ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr);
4444         lambda_max = eigs_r[neigs-1];
4445         lambda_min = eigs_r[0];
4446         if (pcbddc->use_coarse_estimates) {
4447           if (lambda_max>lambda_min) {
4448             ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
4449             ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
4450           }
4451         }
4452       }
4453 
4454       /* check coarse problem residual error */
4455       if (pcbddc->dbg_flag) {
4456         PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
4457         ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4458         ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr);
4459         ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
4460         ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr);
4461         ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
4462         ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
4463         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr);
4464         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr);
4465         ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr);
4466         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
4467         ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
4468         if (compute_eigs) {
4469           PetscReal lambda_max_s,lambda_min_s;
4470           ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr);
4471           ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
4472           ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr);
4473           ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr);
4474           for (i=0;i<neigs;i++) {
4475             ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr);
4476           }
4477         }
4478         ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr);
4479         ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr);
4480       }
4481       ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
4482       if (compute_eigs) {
4483         ierr = PetscFree(eigs_r);CHKERRQ(ierr);
4484         ierr = PetscFree(eigs_c);CHKERRQ(ierr);
4485       }
4486     }
4487   }
4488   /* print additional info */
4489   if (pcbddc->dbg_flag) {
4490     /* waits until all processes reaches this point */
4491     ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr);
4492     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
4493     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4494   }
4495 
4496   /* free memory */
4497   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
4498   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
4499   PetscFunctionReturn(0);
4500 }
4501 
4502 #undef __FUNCT__
4503 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
4504 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
4505 {
4506   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
4507   PC_IS*         pcis = (PC_IS*)pc->data;
4508   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
4509   PetscInt       i,coarse_size=0;
4510   PetscInt       *local_primal_indices=NULL;
4511   PetscErrorCode ierr;
4512 
4513   PetscFunctionBegin;
4514   /* Compute global number of coarse dofs */
4515   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
4516     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
4517   }
4518   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
4519 
4520   /* check numbering */
4521   if (pcbddc->dbg_flag) {
4522     PetscScalar coarsesum,*array;
4523     PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
4524 
4525     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4526     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
4527     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
4528     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
4529     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
4530     for (i=0;i<pcbddc->local_primal_size;i++) {
4531       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
4532     }
4533     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
4534     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
4535     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4536     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4537     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4538     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4539     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4540     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4541     for (i=0;i<pcis->n;i++) {
4542       if (array[i] == 1.0) {
4543         set_error = PETSC_TRUE;
4544         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
4545       }
4546     }
4547     ierr = MPI_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
4548     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4549     for (i=0;i<pcis->n;i++) {
4550       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
4551     }
4552     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
4553     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
4554     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4555     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
4556     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
4557     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
4558     if (pcbddc->dbg_flag > 1 || set_error_reduced) {
4559       PetscInt *gidxs;
4560 
4561       ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr);
4562       ierr = ISLocalToGlobalMappingApply(matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr);
4563       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
4564       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4565       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
4566       for (i=0;i<pcbddc->local_primal_size;i++) {
4567         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr);
4568       }
4569       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4570       ierr = PetscFree(gidxs);CHKERRQ(ierr);
4571     }
4572     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
4573     if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
4574   }
4575   /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */
4576   /* get back data */
4577   *coarse_size_n = coarse_size;
4578   *local_primal_indices_n = local_primal_indices;
4579   PetscFunctionReturn(0);
4580 }
4581 
4582 #undef __FUNCT__
4583 #define __FUNCT__ "PCBDDCGlobalToLocal"
4584 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
4585 {
4586   IS             localis_t;
4587   PetscInt       i,lsize,*idxs,n;
4588   PetscScalar    *vals;
4589   PetscErrorCode ierr;
4590 
4591   PetscFunctionBegin;
4592   /* get indices in local ordering exploiting local to global map */
4593   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
4594   ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr);
4595   for (i=0;i<lsize;i++) vals[i] = 1.0;
4596   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4597   ierr = VecSet(gwork,0.0);CHKERRQ(ierr);
4598   ierr = VecSet(lwork,0.0);CHKERRQ(ierr);
4599   if (idxs) { /* multilevel guard */
4600     ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
4601   }
4602   ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr);
4603   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
4604   ierr = PetscFree(vals);CHKERRQ(ierr);
4605   ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr);
4606   /* now compute set in local ordering */
4607   ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4608   ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
4609   ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4610   ierr = VecGetSize(lwork,&n);CHKERRQ(ierr);
4611   for (i=0,lsize=0;i<n;i++) {
4612     if (PetscRealPart(vals[i]) > 0.5) {
4613       lsize++;
4614     }
4615   }
4616   ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr);
4617   for (i=0,lsize=0;i<n;i++) {
4618     if (PetscRealPart(vals[i]) > 0.5) {
4619       idxs[lsize++] = i;
4620     }
4621   }
4622   ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr);
4623   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
4624   *localis = localis_t;
4625   PetscFunctionReturn(0);
4626 }
4627 
4628 /* the next two functions will be called in KSPMatMult if a change of basis has been requested */
4629 #undef __FUNCT__
4630 #define __FUNCT__ "PCBDDCMatMult_Private"
4631 static PetscErrorCode PCBDDCMatMult_Private(Mat A, Vec x, Vec y)
4632 {
4633   PCBDDCChange_ctx change_ctx;
4634   PetscErrorCode   ierr;
4635 
4636   PetscFunctionBegin;
4637   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4638   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4639   ierr = MatMult(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4640   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4641   PetscFunctionReturn(0);
4642 }
4643 
4644 #undef __FUNCT__
4645 #define __FUNCT__ "PCBDDCMatMultTranspose_Private"
4646 static PetscErrorCode PCBDDCMatMultTranspose_Private(Mat A, Vec x, Vec y)
4647 {
4648   PCBDDCChange_ctx change_ctx;
4649   PetscErrorCode   ierr;
4650 
4651   PetscFunctionBegin;
4652   ierr = MatShellGetContext(A,&change_ctx);CHKERRQ(ierr);
4653   ierr = MatMult(change_ctx->global_change,x,change_ctx->work[0]);CHKERRQ(ierr);
4654   ierr = MatMultTranspose(change_ctx->original_mat,change_ctx->work[0],change_ctx->work[1]);CHKERRQ(ierr);
4655   ierr = MatMultTranspose(change_ctx->global_change,change_ctx->work[1],y);CHKERRQ(ierr);
4656   PetscFunctionReturn(0);
4657 }
4658 
4659 #undef __FUNCT__
4660 #define __FUNCT__ "PCBDDCSetUpSubSchurs"
4661 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
4662 {
4663   PC_IS               *pcis=(PC_IS*)pc->data;
4664   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4665   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4666   Mat                 S_j;
4667   PetscInt            *used_xadj,*used_adjncy;
4668   PetscBool           free_used_adj;
4669   PetscErrorCode      ierr;
4670 
4671   PetscFunctionBegin;
4672   /* decide the adjacency to be used for determining internal problems for local schur on subsets */
4673   free_used_adj = PETSC_FALSE;
4674   if (pcbddc->sub_schurs_layers == -1) {
4675     used_xadj = NULL;
4676     used_adjncy = NULL;
4677   } else {
4678     if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
4679       used_xadj = pcbddc->mat_graph->xadj;
4680       used_adjncy = pcbddc->mat_graph->adjncy;
4681     } else if (pcbddc->computed_rowadj) {
4682       used_xadj = pcbddc->mat_graph->xadj;
4683       used_adjncy = pcbddc->mat_graph->adjncy;
4684     } else {
4685       PetscBool      flg_row=PETSC_FALSE;
4686       const PetscInt *xadj,*adjncy;
4687       PetscInt       nvtxs;
4688 
4689       ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4690       if (flg_row) {
4691         ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr);
4692         ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr);
4693         ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr);
4694         free_used_adj = PETSC_TRUE;
4695       } else {
4696         pcbddc->sub_schurs_layers = -1;
4697         used_xadj = NULL;
4698         used_adjncy = NULL;
4699       }
4700       ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
4701     }
4702   }
4703 
4704   /* setup sub_schurs data */
4705   ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr);
4706   if (!sub_schurs->use_mumps) {
4707     /* pcbddc->ksp_D up to date only if not using MUMPS */
4708     ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr);
4709     ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4710   } else {
4711     ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,pcbddc->faster_deluxe,pcbddc->adaptive_selection,pcbddc->use_edges,pcbddc->use_faces);CHKERRQ(ierr);
4712   }
4713   ierr = MatDestroy(&S_j);CHKERRQ(ierr);
4714 
4715   /* free adjacency */
4716   if (free_used_adj) {
4717     ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr);
4718   }
4719   PetscFunctionReturn(0);
4720 }
4721 
4722 #undef __FUNCT__
4723 #define __FUNCT__ "PCBDDCInitSubSchurs"
4724 PetscErrorCode PCBDDCInitSubSchurs(PC pc)
4725 {
4726   PC_IS               *pcis=(PC_IS*)pc->data;
4727   PC_BDDC             *pcbddc=(PC_BDDC*)pc->data;
4728   PCBDDCSubSchurs     sub_schurs=pcbddc->sub_schurs;
4729   PCBDDCGraph         graph;
4730   PetscErrorCode      ierr;
4731 
4732   PetscFunctionBegin;
4733   /* attach interface graph for determining subsets */
4734   if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
4735     IS verticesIS;
4736 
4737     ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr);
4738     ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr);
4739     ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global);CHKERRQ(ierr);
4740     ierr = PCBDDCGraphSetUp(graph,0,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticesIS);CHKERRQ(ierr);
4741     ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr);
4742     ierr = ISDestroy(&verticesIS);CHKERRQ(ierr);
4743 /*
4744     if (pcbddc->dbg_flag) {
4745       ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr);
4746     }
4747 */
4748   } else {
4749     graph = pcbddc->mat_graph;
4750   }
4751 
4752   /* sub_schurs init */
4753   ierr = PCBDDCSubSchursInit(sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap);CHKERRQ(ierr);
4754 
4755   /* free graph struct */
4756   if (pcbddc->sub_schurs_rebuild) {
4757     ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr);
4758   }
4759   PetscFunctionReturn(0);
4760 }
4761