xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision b8ffe3172e0eee595e5ba1cd1948f5fa9a5e21e6)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "PCBDDCCreateWorkVectors"
110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
111 {
112   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
113   PC_IS          *pcis = (PC_IS*)pc->data;
114   VecType        impVecType;
115   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
120   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
121   local_primal_size = n_constraints+n_vertices;
122   n_R = pcis->n-n_vertices;
123   /* local work vectors */
124   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
125   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
126   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
127   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
128   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
129   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
130   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
131   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
132   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
133   if (n_constraints) {
134     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
135     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
136     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
144 {
145   PetscErrorCode         ierr;
146   /* pointers to pcis and pcbddc */
147   PC_IS*                 pcis = (PC_IS*)pc->data;
148   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
149   /* submatrices of local problem */
150   Mat                    A_RV,A_VR,A_VV;
151   /* working matrices */
152   Mat                    M1,M2,M3,C_CR;
153   /* working vectors */
154   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
155   /* additional working stuff */
156   IS                     is_aux;
157   ISLocalToGlobalMapping BtoNmap;
158   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
159   const PetscScalar      *array,*row_cmat_values;
160   const PetscInt         *row_cmat_indices,*idx_R_local;
161   PetscInt               *vertices,*idx_V_B,*auxindices;
162   PetscInt               n_vertices,n_constraints,size_of_constraint;
163   PetscInt               i,j,n_R,n_D,n_B;
164   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
165   /* Vector and matrix types */
166   VecType                impVecType;
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
184 
185   /* Allocating some extra storage just to be safe */
186   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
187   for (i=0;i<pcis->n;i++) auxindices[i]=i;
188 
189   /* vertices in boundary numbering */
190   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
191   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
192   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
193   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
194   if (i != n_vertices) {
195     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
196   }
197 
198   /* some work vectors on vertices and/or constraints */
199   if (n_vertices) {
200     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
201     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
202     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
203     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
204   }
205   if (n_constraints) {
206     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
208   }
209 
210   /* Precompute stuffs needed for preprocessing and application of BDDC*/
211   if (n_constraints) {
212     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
213     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
214     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
215     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
216 
217     /* Extract constraints on R nodes: C_{CR}  */
218     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
219     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
220     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
221 
222     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
223     for (i=0;i<n_constraints;i++) {
224       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
225       /* Get row of constraint matrix in R numbering */
226       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
227       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
228       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
229       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
230       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
231       /* Solve for row of constraint matrix in R numbering */
232       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
233       /* Set values in local_auxmat2 */
234       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
235       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
237     }
238     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
241 
242     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
243     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
244     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
245     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
246     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
247     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
248     ierr = MatSetUp(M1);CHKERRQ(ierr);
249     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
250     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
251     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
252     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
253     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
254     ierr = MatDestroy(&M2);CHKERRQ(ierr);
255     ierr = MatDestroy(&M3);CHKERRQ(ierr);
256     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
257     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
258   }
259 
260   /* Get submatrices from subdomain matrix */
261   if (n_vertices) {
262     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
263     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
264     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
265     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
266     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
267   }
268 
269   /* Matrix of coarse basis functions (local) */
270   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
271   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
272   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
273   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
274   if (pcbddc->switch_static || pcbddc->dbg_flag) {
275     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
276     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
277     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
279   }
280 
281   if (pcbddc->dbg_flag) {
282     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
283     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
284     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
285   }
286   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
287   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
288 
289   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
290 
291   /* vertices */
292   for (i=0;i<n_vertices;i++) {
293     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
294     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
295     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
296     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
297     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
298     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
299     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
300     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
301     if (n_constraints) {
302       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
303       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
304       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
305     }
306     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
307     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
308 
309     /* Set values in coarse basis function and subdomain part of coarse_mat */
310     /* coarse basis functions */
311     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
312     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
313     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
314     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
315     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
317     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
318     if (pcbddc->switch_static || pcbddc->dbg_flag) {
319       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
320       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
322       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
323       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
324     }
325     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
326     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
327     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
329     if (n_constraints) {
330       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
331       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
332       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
333     }
334 
335     /* check */
336     if (pcbddc->dbg_flag) {
337       /* assemble subdomain vector on local nodes */
338       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
339       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
340       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
341       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
342       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
343       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
344       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
345       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
346       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350       if (n_constraints) {
351         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
353         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354       }
355       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
357       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
358       /* check saddle point solution */
359       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
360       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
361       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
362       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
363       /* shift by the identity matrix */
364       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
365       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
366       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
367       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
368     }
369   }
370 
371   /* constraints */
372   for (i=0;i<n_constraints;i++) {
373     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
374     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
375     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
376     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
377     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
378     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
379     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
381     if (n_vertices) {
382       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
383     }
384     /* Set values in coarse basis function and subdomain part of coarse_mat */
385     /* coarse basis functions */
386     j = i+n_vertices; /* don't touch this! */
387     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
388     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
389     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
390     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
391     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
392     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
393     if (pcbddc->switch_static || pcbddc->dbg_flag) {
394       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
397       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
398       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
399     }
400     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
401     if (n_vertices) {
402       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
403       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
405     }
406     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
407     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
408     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
409 
410     if (pcbddc->dbg_flag) {
411       /* assemble subdomain vector on nodes */
412       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
413       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
414       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
416       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
417       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
418       /* assemble subdomain vector of lagrange multipliers */
419       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
420       if (n_vertices) {
421         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
422         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
423         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       }
425       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
430       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
431       /* check saddle point solution */
432       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
433       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
434       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
435       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
436       /* shift by the identity matrix */
437       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
441     }
442   }
443   /* call assembling routines for local coarse basis */
444   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
445   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
446   if (pcbddc->switch_static || pcbddc->dbg_flag) {
447     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
448     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
449   }
450 
451   /* compute other basis functions for non-symmetric problems */
452   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
453   if (!setsym || (setsym && !issym)) {
454     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
455     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
456     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
457     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
458     if (pcbddc->switch_static || pcbddc->dbg_flag) {
459       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
460       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
461       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
462       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
463     }
464     for (i=0;i<pcbddc->local_primal_size;i++) {
465       if (n_constraints) {
466         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
467         for (j=0;j<n_constraints;j++) {
468           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
469         }
470         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
471         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
472       }
473       if (i<n_vertices) {
474         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
475         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
476         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
477         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
478         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
479         if (n_constraints) {
480           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
481         }
482       } else {
483         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
484       }
485       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
486       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
487       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
490       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
491       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
492       if (i<n_vertices) {
493         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
494       }
495       if (pcbddc->switch_static || pcbddc->dbg_flag) {
496         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
498         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
499         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
500         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
501       }
502 
503       if (pcbddc->dbg_flag) {
504         /* assemble subdomain vector on nodes */
505         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
506         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
507         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
509         if (i<n_vertices) {
510           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
511         }
512         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
513         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
514         /* assemble subdomain vector of lagrange multipliers */
515         for (j=0;j<pcbddc->local_primal_size;j++) {
516           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
517         }
518         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520         /* check saddle point solution */
521         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
522         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
523         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
524         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
525         /* shift by the identity matrix */
526         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
527         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
528         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
529         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
530       }
531     }
532     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534     if (pcbddc->switch_static || pcbddc->dbg_flag) {
535       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
537     }
538   }
539   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
540   /* Checking coarse_sub_mat and coarse basis functios */
541   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
542   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
543   if (pcbddc->dbg_flag) {
544     Mat         coarse_sub_mat;
545     Mat         AUXMAT,TM1,TM2,TM3,TM4;
546     Mat         coarse_phi_D,coarse_phi_B;
547     Mat         coarse_psi_D,coarse_psi_B;
548     Mat         A_II,A_BB,A_IB,A_BI;
549     MatType     checkmattype=MATSEQAIJ;
550     PetscReal   real_value;
551 
552     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
553     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
554     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
555     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
556     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
557     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
558     if (pcbddc->coarse_psi_B) {
559       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
560       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
561     }
562     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
563 
564     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
565     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
566     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
567     if (pcbddc->coarse_psi_B) {
568       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
569       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
570       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
571       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
572       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
573       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
574       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
575       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
576       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
577       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
578       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
579       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
580     } else {
581       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
583       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589     }
590     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
591     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
592     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
593     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
594     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
595     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
596     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
597     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
598     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
599     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
600     for (i=0;i<pcbddc->local_primal_size;i++) {
601       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
602     }
603     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
604     for (i=0;i<pcbddc->local_primal_size;i++) {
605       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
606     }
607     if (pcbddc->coarse_psi_B) {
608       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
609       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
610         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
611       }
612       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
613       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
614         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
615       }
616     }
617     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
618     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
619     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
620     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
621     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
622     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
623     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
624     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
625     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
626     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
627     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
628     if (pcbddc->coarse_psi_B) {
629       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
630       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
631     }
632     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
633     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
634     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
635     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
636   }
637   /* free memory */
638   if (n_vertices) {
639     ierr = PetscFree(vertices);CHKERRQ(ierr);
640     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
641     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
643     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
644     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
645   }
646   if (n_constraints) {
647     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
648     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
651   }
652   ierr = PetscFree(auxindices);CHKERRQ(ierr);
653   /* get back data */
654   *coarse_submat_vals_n = coarse_submat_vals;
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
660 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
661 {
662   PC_IS*            pcis = (PC_IS*)(pc->data);
663   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
664   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
665   /* manage repeated solves */
666   MatReuse          reuse;
667   MatStructure      matstruct;
668   PetscErrorCode    ierr;
669 
670   PetscFunctionBegin;
671   /* get mat flags */
672   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
673   reuse = MAT_INITIAL_MATRIX;
674   if (pc->setupcalled) {
675     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
676     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
677     if (matstruct == SAME_NONZERO_PATTERN) {
678       reuse = MAT_REUSE_MATRIX;
679     } else {
680       reuse = MAT_INITIAL_MATRIX;
681     }
682   }
683   if (reuse == MAT_INITIAL_MATRIX) {
684     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
685     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
686     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
687     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
688     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
689   }
690 
691   /* transform local matrices if needed */
692   if (pcbddc->use_change_of_basis) {
693     Mat         change_mat_all;
694     PetscScalar *row_cmat_values;
695     PetscInt    *row_cmat_indices;
696     PetscInt    *nnz,*is_indices,*temp_indices;
697     PetscInt    i,j,k,n_D,n_B;
698 
699     /* Get Non-overlapping dimensions */
700     n_B = pcis->n_B;
701     n_D = pcis->n-n_B;
702 
703     /* compute nonzero structure of change of basis on all local nodes */
704     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
705     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
706     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
707     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
708     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
709     k=1;
710     for (i=0;i<n_B;i++) {
711       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
712       nnz[is_indices[i]]=j;
713       if (k < j) k = j;
714       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
715     }
716     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     /* assemble change of basis matrix on the whole set of local dofs */
718     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
719     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
720     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
721     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
722     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
723     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
724     for (i=0;i<n_D;i++) {
725       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
726     }
727     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     for (i=0;i<n_B;i++) {
730       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
731       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
732       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
733       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
734     }
735     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
738     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
739     if (i==1) {
740       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
741     } else {
742       Mat work_mat;
743       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
744       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
745       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
746     }
747     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
748     ierr = PetscFree(nnz);CHKERRQ(ierr);
749     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
750   } else {
751     /* without change of basis, the local matrix is unchanged */
752     if (!pcbddc->local_mat) {
753       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
754       pcbddc->local_mat = matis->A;
755     }
756   }
757 
758   /* get submatrices */
759   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
760   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
761   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
762   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
769 {
770   PC_IS*         pcis = (PC_IS*)(pc->data);
771   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
772   IS             is_aux1,is_aux2;
773   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
774   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
775   PetscBT        bitmask;
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   /* Set Non-overlapping dimensions */
780   n_B = pcis->n_B; n_D = pcis->n - n_B;
781   /* get vertex indices from constraint matrix */
782   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
783   /* Set number of constraints */
784   n_constraints = pcbddc->local_primal_size-n_vertices;
785   /* create auxiliary bitmask */
786   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
787   for (i=0;i<n_vertices;i++) {
788     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
789   }
790   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
791   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
792   for (i=0, n_R=0; i<pcis->n; i++) {
793     if (!PetscBTLookup(bitmask,i)) {
794       idx_R_local[n_R] = i;
795       n_R++;
796     }
797   }
798   ierr = PetscFree(vertices);CHKERRQ(ierr);
799   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
800 
801   /* print some info if requested */
802   if (pcbddc->dbg_flag) {
803     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
805     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
806     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
807     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
808     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
809     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
810   }
811 
812   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
813   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
814   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
815   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816   for (i=0; i<n_D; i++) {
817     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
818   }
819   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
820   for (i=0, j=0; i<n_R; i++) {
821     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
822       aux_array1[j++] = i;
823     }
824   }
825   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
826   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
827   for (i=0, j=0; i<n_B; i++) {
828     if (!PetscBTLookup(bitmask,is_indices[i])) {
829       aux_array2[j++] = i;
830     }
831   }
832   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
834   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
835   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
836   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
837 
838   if (pcbddc->switch_static || pcbddc->dbg_flag) {
839     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
840     for (i=0, j=0; i<n_R; i++) {
841       if (PetscBTLookup(bitmask,idx_R_local[i])) {
842         aux_array1[j++] = i;
843       }
844     }
845     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
846     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
847     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   }
849   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
856 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
857 {
858   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
859   PC_IS          *pcis = (PC_IS*)pc->data;
860   PC             pc_temp;
861   Mat            A_RR;
862   Vec            vec1,vec2,vec3;
863   MatStructure   matstruct;
864   PetscScalar    m_one = -1.0;
865   PetscReal      value;
866   PetscInt       n_D,n_R;
867   PetscBool      use_exact,use_exact_reduced;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /* Creating PC contexts for local Dirichlet and Neumann problems */
872   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
873 
874   /* DIRICHLET PROBLEM */
875   /* Matrix for Dirichlet problem is pcis->A_II */
876   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
877   if (!pcbddc->ksp_D) { /* create object if not yet build */
878     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
879     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
880     /* default */
881     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
882     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
883     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
884     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
885     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
886   }
887   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
888   /* Allow user's customization */
889   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
890   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
891   if (!n_D) {
892     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
893     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
894   }
895   /* Set Up KSP for Dirichlet problem of BDDC */
896   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
897   /* set ksp_D into pcis data */
898   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
899   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
900   pcis->ksp_D = pcbddc->ksp_D;
901 
902   /* NEUMANN PROBLEM */
903   /* Matrix for Neumann problem is A_RR -> we need to create it */
904   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
905   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
906   if (!pcbddc->ksp_R) { /* create object if not yet build */
907     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
908     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
909     /* default */
910     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
911     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
912     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
913     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
914     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
915   }
916   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
917   /* Allow user's customization */
918   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
919   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
920   if (!n_R) {
921     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
922     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
923   }
924   /* Set Up KSP for Neumann problem of BDDC */
925   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
926 
927   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
928 
929   /* Dirichlet */
930   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
931   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
932   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
933   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
934   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
935   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
936   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
937   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
938   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
939   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
940   /* need to be adapted? */
941   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
942   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
943   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
944   /* print info */
945   if (pcbddc->dbg_flag) {
946     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
947     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
948     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
949     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
950     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
951   }
952   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
953     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
954   }
955 
956   /* Neumann */
957   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
958   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
959   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
960   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
961   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
962   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
963   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
964   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
965   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
966   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
967   /* need to be adapted? */
968   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
969   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
970   /* print info */
971   if (pcbddc->dbg_flag) {
972     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
973     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
974   }
975   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
976     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
977   }
978 
979   /* free Neumann problem's matrix */
980   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
986 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
987 {
988   PetscErrorCode ierr;
989   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
990 
991   PetscFunctionBegin;
992   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
993   if (pcbddc->local_auxmat1) {
994     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
995     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
996   }
997   PetscFunctionReturn(0);
998 }
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1002 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1003 {
1004   PetscErrorCode ierr;
1005   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1006   PC_IS*            pcis = (PC_IS*)  (pc->data);
1007   const PetscScalar zero = 0.0;
1008 
1009   PetscFunctionBegin;
1010   /* Application of PHI^T (or PSI^T)  */
1011   if (pcbddc->coarse_psi_B) {
1012     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1013     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1014   } else {
1015     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1016     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1017   }
1018   /* Scatter data of coarse_rhs */
1019   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1020   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021 
1022   /* Local solution on R nodes */
1023   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1024   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1025   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1026   if (pcbddc->switch_static) {
1027     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1028     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1029   }
1030   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1031   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1032   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034   if (pcbddc->switch_static) {
1035     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037   }
1038 
1039   /* Coarse solution */
1040   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1042     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1043   }
1044   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1046 
1047   /* Sum contributions from two levels */
1048   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1049   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1055 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1056 {
1057   PetscErrorCode ierr;
1058   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1059 
1060   PetscFunctionBegin;
1061   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1067 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1068 {
1069   PetscErrorCode ierr;
1070   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1071 
1072   PetscFunctionBegin;
1073   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 /* uncomment for testing purposes */
1078 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1079 #undef __FUNCT__
1080 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1081 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1082 {
1083   PetscErrorCode    ierr;
1084   PC_IS*            pcis = (PC_IS*)(pc->data);
1085   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1086   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1087   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1088   MatType           impMatType=MATSEQAIJ;
1089   /* one and zero */
1090   PetscScalar       one=1.0,zero=0.0;
1091   /* space to store constraints and their local indices */
1092   PetscScalar       *temp_quadrature_constraint;
1093   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1094   /* iterators */
1095   PetscInt          i,j,k,total_counts,temp_start_ptr;
1096   /* stuff to store connected components stored in pcbddc->mat_graph */
1097   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1098   PetscInt          n_ISForFaces,n_ISForEdges;
1099   /* near null space stuff */
1100   MatNullSpace      nearnullsp;
1101   const Vec         *nearnullvecs;
1102   Vec               *localnearnullsp;
1103   PetscBool         nnsp_has_cnst;
1104   PetscInt          nnsp_size;
1105   PetscScalar       *array;
1106   /* BLAS integers */
1107   PetscBLASInt      lwork,lierr;
1108   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1109   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1110   /* LAPACK working arrays for SVD or POD */
1111   PetscBool         skip_lapack;
1112   PetscScalar       *work;
1113   PetscReal         *singular_vals;
1114 #if defined(PETSC_USE_COMPLEX)
1115   PetscReal         *rwork;
1116 #endif
1117 #if defined(PETSC_MISSING_LAPACK_GESVD)
1118   PetscBLASInt      Blas_one_2=1;
1119   PetscScalar       *temp_basis,*correlation_mat;
1120 #else
1121   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1122   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1123 #endif
1124   /* change of basis */
1125   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1126   PetscBool         boolforchange;
1127   PetscBT           touched,change_basis;
1128   /* auxiliary stuff */
1129   PetscInt          *nnz,*is_indices,*local_to_B;
1130   /* some quantities */
1131   PetscInt          n_vertices,total_primal_vertices;
1132   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1133 
1134 
1135   PetscFunctionBegin;
1136   /* Get index sets for faces, edges and vertices from graph */
1137   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1138     pcbddc->use_vertices = PETSC_TRUE;
1139   }
1140   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1141   /* HACK: provide functions to set change of basis */
1142   if (!ISForVertices && pcbddc->NullSpace) {
1143     pcbddc->use_change_of_basis = PETSC_TRUE;
1144     pcbddc->use_change_on_faces = PETSC_FALSE;
1145   }
1146   /* print some info */
1147   if (pcbddc->dbg_flag) {
1148     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1149     i = 0;
1150     if (ISForVertices) {
1151       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1152     }
1153     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1154     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1155     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1156     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1157   }
1158   /* check if near null space is attached to global mat */
1159   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1160   if (nearnullsp) {
1161     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1162   } else { /* if near null space is not provided BDDC uses constants by default */
1163     nnsp_size = 0;
1164     nnsp_has_cnst = PETSC_TRUE;
1165   }
1166   /* get max number of constraints on a single cc */
1167   max_constraints = nnsp_size;
1168   if (nnsp_has_cnst) max_constraints++;
1169 
1170   /*
1171        Evaluate maximum storage size needed by the procedure
1172        - temp_indices will contain start index of each constraint stored as follows
1173        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1174        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1175        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1176                                                                                                                                                          */
1177   total_counts = n_ISForFaces+n_ISForEdges;
1178   total_counts *= max_constraints;
1179   n_vertices = 0;
1180   if (ISForVertices) {
1181     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1182   }
1183   total_counts += n_vertices;
1184   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1185   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1186   total_counts = 0;
1187   max_size_of_constraint = 0;
1188   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1189     if (i<n_ISForEdges) {
1190       used_IS = &ISForEdges[i];
1191     } else {
1192       used_IS = &ISForFaces[i-n_ISForEdges];
1193     }
1194     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1195     total_counts += j;
1196     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1197   }
1198   total_counts *= max_constraints;
1199   total_counts += n_vertices;
1200   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1201   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1202   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1203   /* local to boundary numbering */
1204   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1205   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1206   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1207   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1208   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1209   /* get local part of global near null space vectors */
1210   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1211   for (k=0;k<nnsp_size;k++) {
1212     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1213     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1215   }
1216 
1217   /* whether or not to skip lapack calls */
1218   skip_lapack = PETSC_TRUE;
1219   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1220 
1221   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1222   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1223     PetscScalar temp_work;
1224 #if defined(PETSC_MISSING_LAPACK_GESVD)
1225     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1226     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1227     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1228     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1229 #if defined(PETSC_USE_COMPLEX)
1230     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1231 #endif
1232     /* now we evaluate the optimal workspace using query with lwork=-1 */
1233     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1234     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1235     lwork = -1;
1236     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1237 #if !defined(PETSC_USE_COMPLEX)
1238     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1239 #else
1240     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1241 #endif
1242     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1243     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1244 #else /* on missing GESVD */
1245     /* SVD */
1246     PetscInt max_n,min_n;
1247     max_n = max_size_of_constraint;
1248     min_n = max_constraints;
1249     if (max_size_of_constraint < max_constraints) {
1250       min_n = max_size_of_constraint;
1251       max_n = max_constraints;
1252     }
1253     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1254 #if defined(PETSC_USE_COMPLEX)
1255     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1256 #endif
1257     /* now we evaluate the optimal workspace using query with lwork=-1 */
1258     lwork = -1;
1259     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1260     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1261     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1262     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1263 #if !defined(PETSC_USE_COMPLEX)
1264     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1265 #else
1266     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1267 #endif
1268     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1269     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1270 #endif /* on missing GESVD */
1271     /* Allocate optimal workspace */
1272     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1273     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1274   }
1275   /* Now we can loop on constraining sets */
1276   total_counts = 0;
1277   temp_indices[0] = 0;
1278   /* vertices */
1279   if (ISForVertices) {
1280     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1281     if (nnsp_has_cnst) { /* consider all vertices */
1282       for (i=0;i<n_vertices;i++) {
1283         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1284         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1285         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1286         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1287         total_counts++;
1288       }
1289     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1290       PetscBool used_vertex;
1291       for (i=0;i<n_vertices;i++) {
1292         used_vertex = PETSC_FALSE;
1293         k = 0;
1294         while (!used_vertex && k<nnsp_size) {
1295           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1296           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1297             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1298             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1299             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1300             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1301             total_counts++;
1302             used_vertex = PETSC_TRUE;
1303           }
1304           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1305           k++;
1306         }
1307       }
1308     }
1309     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1310     n_vertices = total_counts;
1311   }
1312 
1313   /* edges and faces */
1314   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1315     if (i<n_ISForEdges) {
1316       used_IS = &ISForEdges[i];
1317       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1318     } else {
1319       used_IS = &ISForFaces[i-n_ISForEdges];
1320       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1321     }
1322     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1323     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1324     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1325     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1326     /* change of basis should not be performed on local periodic nodes */
1327     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1328     if (nnsp_has_cnst) {
1329       PetscScalar quad_value;
1330       temp_constraints++;
1331       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1332       for (j=0;j<size_of_constraint;j++) {
1333         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1334         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1335         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1336       }
1337       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1338       if (boolforchange) {
1339         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1340       }
1341       total_counts++;
1342     }
1343     for (k=0;k<nnsp_size;k++) {
1344       PetscReal real_value;
1345       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1346       for (j=0;j<size_of_constraint;j++) {
1347         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1348         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1349         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1350       }
1351       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1352       /* check if array is null on the connected component */
1353       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1354       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1355       if (real_value > 0.0) { /* keep indices and values */
1356         temp_constraints++;
1357         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1358         if (boolforchange) {
1359           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1360         }
1361         total_counts++;
1362       }
1363     }
1364     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1365     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1366     if (!pcbddc->use_nnsp_true) {
1367       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1368 
1369 #if defined(PETSC_MISSING_LAPACK_GESVD)
1370       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1371          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1372          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1373             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1374             from that computed using LAPACKgesvd
1375          -> This is due to a different computation of eigenvectors in LAPACKheev
1376          -> The quality of the POD-computed basis will be the same */
1377       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1378       /* Store upper triangular part of correlation matrix */
1379       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1380       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1381       for (j=0;j<temp_constraints;j++) {
1382         for (k=0;k<j+1;k++) {
1383           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1384         }
1385       }
1386       /* compute eigenvalues and eigenvectors of correlation matrix */
1387       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1388       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1389 #if !defined(PETSC_USE_COMPLEX)
1390       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1391 #else
1392       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1393 #endif
1394       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1395       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1396       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1397       j=0;
1398       while (j < temp_constraints && singular_vals[j] < tol) j++;
1399       total_counts=total_counts-j;
1400       /* scale and copy POD basis into used quadrature memory */
1401       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1402       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1403       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1404       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1405       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1406       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1407       if (j<temp_constraints) {
1408         PetscInt ii;
1409         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1410         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1411         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1412         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1413         for (k=0;k<temp_constraints-j;k++) {
1414           for (ii=0;ii<size_of_constraint;ii++) {
1415             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1416           }
1417         }
1418       }
1419 #else  /* on missing GESVD */
1420       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1421       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1422       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1423       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1424 #if !defined(PETSC_USE_COMPLEX)
1425       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1426 #else
1427       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1428 #endif
1429       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1430       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1431       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1432       k = temp_constraints;
1433       if (k > size_of_constraint) k = size_of_constraint;
1434       j = 0;
1435       while (j < k && singular_vals[k-j-1] < tol) j++;
1436       total_counts = total_counts-temp_constraints+k-j;
1437 #endif /* on missing GESVD */
1438     }
1439   }
1440   /* free index sets of faces, edges and vertices */
1441   for (i=0;i<n_ISForFaces;i++) {
1442     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1443   }
1444   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1445   for (i=0;i<n_ISForEdges;i++) {
1446     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1447   }
1448   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1449   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1450 
1451   /* free workspace */
1452   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1453     ierr = PetscFree(work);CHKERRQ(ierr);
1454 #if defined(PETSC_USE_COMPLEX)
1455     ierr = PetscFree(rwork);CHKERRQ(ierr);
1456 #endif
1457     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1458 #if defined(PETSC_MISSING_LAPACK_GESVD)
1459     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1460     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1461 #endif
1462   }
1463   for (k=0;k<nnsp_size;k++) {
1464     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1465   }
1466   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1467 
1468   /* set quantities in pcbddc data structure */
1469   /* n_vertices defines the number of subdomain corners in the primal space */
1470   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1471   pcbddc->local_primal_size = total_counts;
1472   pcbddc->n_vertices = n_vertices;
1473   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1474 
1475   /* Create constraint matrix */
1476   /* The constraint matrix is used to compute the l2g map of primal dofs */
1477   /* so we need to set it up properly either with or without change of basis */
1478   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1479   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1480   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1481   /* array to compute a local numbering of constraints : vertices first then constraints */
1482   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1483   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1484   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1485   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1486   /* auxiliary stuff for basis change */
1487   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1488   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1489 
1490   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1491   total_primal_vertices=0;
1492   for (i=0;i<pcbddc->local_primal_size;i++) {
1493     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1494     if (size_of_constraint == 1) {
1495       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1496       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1497       aux_primal_minloc[total_primal_vertices]=0;
1498       total_primal_vertices++;
1499     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1500       PetscInt min_loc,min_index;
1501       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1502       /* find first untouched local node */
1503       k = 0;
1504       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1505       min_index = global_indices[k];
1506       min_loc = k;
1507       /* search the minimum among global nodes already untouched on the cc */
1508       for (k=1;k<size_of_constraint;k++) {
1509         /* there can be more than one constraint on a single connected component */
1510         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1511           min_index = global_indices[k];
1512           min_loc = k;
1513         }
1514       }
1515       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1516       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1517       aux_primal_minloc[total_primal_vertices]=min_loc;
1518       total_primal_vertices++;
1519     }
1520   }
1521   /* free workspace */
1522   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1523   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1524   /* permute indices in order to have a sorted set of vertices */
1525   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1526 
1527   /* nonzero structure of constraint matrix */
1528   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1529   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1530   j=total_primal_vertices;
1531   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1532     if (!PetscBTLookup(change_basis,i)) {
1533       nnz[j]=temp_indices[i+1]-temp_indices[i];
1534       j++;
1535     }
1536   }
1537   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1538   ierr = PetscFree(nnz);CHKERRQ(ierr);
1539   /* set values in constraint matrix */
1540   for (i=0;i<total_primal_vertices;i++) {
1541     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1542   }
1543   total_counts = total_primal_vertices;
1544   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1545     if (!PetscBTLookup(change_basis,i)) {
1546       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1547       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1548       total_counts++;
1549     }
1550   }
1551   /* assembling */
1552   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554   /*
1555   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1556   */
1557   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1558   if (pcbddc->use_change_of_basis) {
1559     PetscBool qr_needed = PETSC_FALSE;
1560     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1561     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1562     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1563     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1564     /* work arrays */
1565     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1566     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1567     /* nonzeros per row */
1568     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1569       if (PetscBTLookup(change_basis,i)) {
1570         qr_needed = PETSC_TRUE;
1571         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1572         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1573       }
1574     }
1575     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1576     ierr = PetscFree(nnz);CHKERRQ(ierr);
1577     /* Set initial identity in the matrix */
1578     for (i=0;i<pcis->n_B;i++) {
1579       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1580     }
1581 
1582     /* Now we loop on the constraints which need a change of basis */
1583     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1584     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1585     /* Change of basis matrix T computed via QR decomposition of constraints */
1586     if (qr_needed) {
1587       /* dual and primal dofs on a single cc */
1588       PetscInt     dual_dofs,primal_dofs;
1589       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1590       PetscInt     primal_counter;
1591       /* working stuff for GEQRF */
1592       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1593       PetscBLASInt lqr_work;
1594       /* working stuff for UNGQR */
1595       PetscScalar  *gqr_work,lgqr_work_t;
1596       PetscBLASInt lgqr_work;
1597       /* working stuff for TRTRS */
1598       PetscScalar  *trs_rhs;
1599       PetscBLASInt Blas_NRHS;
1600       /* pointers for values insertion into change of basis matrix */
1601       PetscInt     *start_rows,*start_cols;
1602       PetscScalar  *start_vals;
1603       /* working stuff for values insertion */
1604       PetscBT      is_primal;
1605 
1606       /* space to store Q */
1607       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1608       /* first we issue queries for optimal work */
1609       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1610       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1611       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1612       lqr_work = -1;
1613       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1614       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1615       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1616       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1617       lgqr_work = -1;
1618       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1619       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1620       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1621       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1622       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1623       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1624       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1625       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1626       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1627       /* array to store scaling factors for reflectors */
1628       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1629       /* array to store rhs and solution of triangular solver */
1630       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1631       /* array to store whether a node is primal or not */
1632       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1633       for (i=0;i<total_primal_vertices;i++) {
1634         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1635       }
1636 
1637       /* allocating workspace for check */
1638       if (pcbddc->dbg_flag) {
1639         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1640         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1641         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1642       }
1643 
1644       /* loop on constraints and see whether or not they need a change of basis */
1645       /* -> using implicit ordering contained in temp_indices data */
1646       total_counts = pcbddc->n_vertices;
1647       primal_counter = total_counts;
1648       while (total_counts<pcbddc->local_primal_size) {
1649         primal_dofs = 1;
1650         if (PetscBTLookup(change_basis,total_counts)) {
1651           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1652           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1653             primal_dofs++;
1654           }
1655           /* get constraint info */
1656           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1657           dual_dofs = size_of_constraint-primal_dofs;
1658 
1659           /* copy quadrature constraints for change of basis check */
1660           if (pcbddc->dbg_flag) {
1661             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1662             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1663           }
1664 
1665           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1666           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1667 
1668           /* compute QR decomposition of constraints */
1669           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1670           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1671           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1672           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1673           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1674           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1675           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1676 
1677           /* explictly compute R^-T */
1678           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1679           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1680           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1681           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1682           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1683           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1684           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1685           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1686           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1687           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1688 
1689           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1690           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1691           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1692           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1693           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1694           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1695           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1696           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1697           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1698 
1699           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1700              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1701              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1702           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1703           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1704           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1705           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1706           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1708           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1709           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1710           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1711           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1712 
1713           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1714           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1715           /* insert cols for primal dofs */
1716           for (j=0;j<primal_dofs;j++) {
1717             start_vals = &qr_basis[j*size_of_constraint];
1718             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1719             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1720           }
1721           /* insert cols for dual dofs */
1722           for (j=0,k=0;j<dual_dofs;k++) {
1723             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1724               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1725               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1726               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1727               j++;
1728             }
1729           }
1730 
1731           /* check change of basis */
1732           if (pcbddc->dbg_flag) {
1733             PetscInt   ii,jj;
1734             PetscBool valid_qr=PETSC_TRUE;
1735             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1736             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1737             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1738             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1739             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1740             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1741             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1742             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1743             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1744             for (jj=0;jj<size_of_constraint;jj++) {
1745               for (ii=0;ii<primal_dofs;ii++) {
1746                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1747                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1748               }
1749             }
1750             if (!valid_qr) {
1751               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1752               for (jj=0;jj<size_of_constraint;jj++) {
1753                 for (ii=0;ii<primal_dofs;ii++) {
1754                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1755                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1756                   }
1757                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1758                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1759                   }
1760                 }
1761               }
1762             } else {
1763               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1764             }
1765           }
1766           /* increment primal counter */
1767           primal_counter += primal_dofs;
1768         } else {
1769           if (pcbddc->dbg_flag) {
1770             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1771           }
1772         }
1773         /* increment constraint counter total_counts */
1774         total_counts += primal_dofs;
1775       }
1776       if (pcbddc->dbg_flag) {
1777         ierr = PetscFree(work);CHKERRQ(ierr);
1778       }
1779       /* free workspace */
1780       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1781       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1782       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1783       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1784       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1785       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1786     }
1787     /* assembling */
1788     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790     /*
1791     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1792     */
1793   }
1794   if (pcbddc->dbg_flag) {
1795     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1796   }
1797   /* free workspace */
1798   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1799   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1800   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1801   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1802   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1803   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1804   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1805   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1811 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1812 {
1813   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1814   PC_IS       *pcis = (PC_IS*)pc->data;
1815   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1816   PetscInt    bs,ierr,i,vertex_size;
1817   PetscViewer viewer=pcbddc->dbg_viewer;
1818 
1819   PetscFunctionBegin;
1820   /* Init local Graph struct */
1821   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1822 
1823   /* Check validity of the csr graph passed in by the user */
1824   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1825     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1826   }
1827   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1828   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1829     Mat mat_adj;
1830     const PetscInt *xadj,*adjncy;
1831     PetscBool flg_row=PETSC_TRUE;
1832 
1833     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1834     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1835     if (!flg_row) {
1836       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1837     }
1838     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1839     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1840     if (!flg_row) {
1841       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1842     }
1843     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1844   }
1845 
1846   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1847   vertex_size = 1;
1848   if (!pcbddc->n_ISForDofs) {
1849     IS *custom_ISForDofs;
1850 
1851     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1852     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1853     for (i=0;i<bs;i++) {
1854       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1855     }
1856     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1857     /* remove my references to IS objects */
1858     for (i=0;i<bs;i++) {
1859       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1860     }
1861     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1862   } else { /* mat block size as vertex size (used for elasticity) */
1863     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1864   }
1865 
1866   /* Setup of Graph */
1867   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1868 
1869   /* Graph's connected components analysis */
1870   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1871 
1872   /* print some info to stdout */
1873   if (pcbddc->dbg_flag) {
1874     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1875   }
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1881 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1882 {
1883   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1884   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1885   PetscErrorCode ierr;
1886 
1887   PetscFunctionBegin;
1888   n = 0;
1889   vertices = 0;
1890   if (pcbddc->ConstraintMatrix) {
1891     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1892     for (i=0;i<local_primal_size;i++) {
1893       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1894       if (size_of_constraint == 1) n++;
1895       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1896     }
1897     if (vertices_idx) {
1898       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1899       n = 0;
1900       for (i=0;i<local_primal_size;i++) {
1901         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1902         if (size_of_constraint == 1) {
1903           vertices[n++]=row_cmat_indices[0];
1904         }
1905         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1906       }
1907     }
1908   }
1909   *n_vertices = n;
1910   if (vertices_idx) *vertices_idx = vertices;
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1916 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1917 {
1918   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1919   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1920   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1921   PetscBT        touched;
1922   PetscErrorCode ierr;
1923 
1924     /* This function assumes that the number of local constraints per connected component
1925        is not greater than the number of nodes defined for the connected component
1926        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1927   PetscFunctionBegin;
1928   n = 0;
1929   constraints_index = 0;
1930   if (pcbddc->ConstraintMatrix) {
1931     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1932     max_size_of_constraint = 0;
1933     for (i=0;i<local_primal_size;i++) {
1934       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1935       if (size_of_constraint > 1) {
1936         n++;
1937       }
1938       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1939       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1940     }
1941     if (constraints_idx) {
1942       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1943       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1944       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1945       n = 0;
1946       for (i=0;i<local_primal_size;i++) {
1947         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1948         if (size_of_constraint > 1) {
1949           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1950           /* find first untouched local node */
1951           j = 0;
1952           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1953           min_index = row_cmat_global_indices[j];
1954           min_loc = j;
1955           /* search the minimum among nodes not yet touched on the connected component
1956              since there can be more than one constraint on a single cc */
1957           for (j=1;j<size_of_constraint;j++) {
1958             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1959               min_index = row_cmat_global_indices[j];
1960               min_loc = j;
1961             }
1962           }
1963           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1964           constraints_index[n++] = row_cmat_indices[min_loc];
1965         }
1966         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1967       }
1968       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1969       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1970     }
1971   }
1972   *n_constraints = n;
1973   if (constraints_idx) *constraints_idx = constraints_index;
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 /* the next two functions has been adapted from pcis.c */
1978 #undef __FUNCT__
1979 #define __FUNCT__ "PCBDDCApplySchur"
1980 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
1981 {
1982   PetscErrorCode ierr;
1983   PC_IS          *pcis = (PC_IS*)(pc->data);
1984 
1985   PetscFunctionBegin;
1986   if (!vec2_B) { vec2_B = v; }
1987   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
1988   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
1989   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
1990   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
1991   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 #undef __FUNCT__
1996 #define __FUNCT__ "PCBDDCApplySchurTranspose"
1997 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
1998 {
1999   PetscErrorCode ierr;
2000   PC_IS          *pcis = (PC_IS*)(pc->data);
2001 
2002   PetscFunctionBegin;
2003   if (!vec2_B) { vec2_B = v; }
2004   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2005   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2006   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2007   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2008   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "PCBDDCSubsetNumbering"
2014 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2015 {
2016   Vec            local_vec,global_vec;
2017   IS             seqis,paris;
2018   VecScatter     scatter_ctx;
2019   PetscScalar    *array;
2020   PetscInt       *temp_global_dofs;
2021   PetscScalar    globalsum;
2022   PetscInt       i,j,s;
2023   PetscInt       nlocals,first_index,old_index,max_local;
2024   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2025   PetscMPIInt    *dof_sizes,*dof_displs;
2026   PetscBool      first_found;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   /* mpi buffers */
2031   MPI_Comm_size(comm,&size_prec_comm);
2032   MPI_Comm_rank(comm,&rank_prec_comm);
2033   j = ( !rank_prec_comm ? size_prec_comm : 0);
2034   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2035   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2036   /* get maximum size of subset */
2037   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2038   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2039   max_local = 0;
2040   if (n_local_dofs) {
2041     max_local = temp_global_dofs[0];
2042     for (i=1;i<n_local_dofs;i++) {
2043       if (max_local < temp_global_dofs[i] ) {
2044         max_local = temp_global_dofs[i];
2045       }
2046     }
2047   }
2048   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2049   max_global++;
2050   max_local = 0;
2051   if (n_local_dofs) {
2052     max_local = local_dofs[0];
2053     for (i=1;i<n_local_dofs;i++) {
2054       if (max_local < local_dofs[i] ) {
2055         max_local = local_dofs[i];
2056       }
2057     }
2058   }
2059   max_local++;
2060   /* allocate workspace */
2061   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2062   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2063   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2064   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2065   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2066   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2067   /* create scatter */
2068   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2069   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2070   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2071   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2072   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2073   /* init array */
2074   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2075   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2076   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2077   if (local_dofs_mult) {
2078     for (i=0;i<n_local_dofs;i++) {
2079       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2080     }
2081   } else {
2082     for (i=0;i<n_local_dofs;i++) {
2083       array[local_dofs[i]]=1.0;
2084     }
2085   }
2086   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2087   /* scatter into global vec and get total number of global dofs */
2088   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2089   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2090   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2091   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2092   /* Fill global_vec with cumulative function for global numbering */
2093   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2094   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2095   nlocals = 0;
2096   first_index = -1;
2097   first_found = PETSC_FALSE;
2098   for (i=0;i<s;i++) {
2099     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2100       first_found = PETSC_TRUE;
2101       first_index = i;
2102     }
2103     nlocals += (PetscInt)PetscRealPart(array[i]);
2104   }
2105   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2106   if (!rank_prec_comm) {
2107     dof_displs[0]=0;
2108     for (i=1;i<size_prec_comm;i++) {
2109       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2110     }
2111   }
2112   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2113   if (first_found) {
2114     array[first_index] += (PetscScalar)nlocals;
2115     old_index = first_index;
2116     for (i=first_index+1;i<s;i++) {
2117       if (PetscRealPart(array[i]) > 0.0) {
2118         array[i] += array[old_index];
2119         old_index = i;
2120       }
2121     }
2122   }
2123   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2124   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2125   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2126   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2127   /* get global ordering of local dofs */
2128   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2129   if (local_dofs_mult) {
2130     for (i=0;i<n_local_dofs;i++) {
2131       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2132     }
2133   } else {
2134     for (i=0;i<n_local_dofs;i++) {
2135       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2136     }
2137   }
2138   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2139   /* free workspace */
2140   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2141   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2142   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2143   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2144   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2145   /* return pointer to global ordering of local dofs */
2146   *global_numbering_subset = temp_global_dofs;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2152 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2153 {
2154   PetscInt       i,j;
2155   PetscScalar    *alphas;
2156   PetscErrorCode ierr;
2157 
2158   PetscFunctionBegin;
2159   /* this implements stabilized Gram-Schmidt */
2160   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2161   for (i=0;i<n;i++) {
2162     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2163     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2164     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2165   }
2166   ierr = PetscFree(alphas);CHKERRQ(ierr);
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatConvert_IS_AIJ"
2173 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2174 {
2175   Mat new_mat;
2176   Mat_IS *matis = (Mat_IS*)(mat->data);
2177   /* info on mat */
2178   PetscInt bs,rows,cols;
2179   PetscInt lrows,lcols;
2180   PetscInt local_rows,local_cols;
2181   PetscMPIInt nsubdomains;
2182   /* preallocation */
2183   Vec vec_dnz,vec_onz;
2184   PetscScalar *my_dnz,*my_onz,*array;
2185   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2186   PetscInt  index_row,index_col,owner;
2187   PetscInt  *local_indices,*global_indices;
2188   /* work */
2189   PetscInt i,j;
2190   PetscErrorCode ierr;
2191 
2192   PetscFunctionBegin;
2193   /* CHECKS */
2194   /* get info from mat */
2195   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2196   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2197   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2198   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2199 
2200   /* MAT_INITIAL_MATRIX */
2201   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2202   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2203   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2204   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2205   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2206   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2207 
2208   /*
2209     preallocation
2210   */
2211 
2212   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2213   /*
2214      Some vectors are needed to sum up properly on shared interface dofs.
2215      Note that preallocation is not exact, since it overestimates nonzeros
2216   */
2217 /*
2218   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2219   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2220   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2221   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2222 */
2223   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2224   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2225   /* All processes need to compute entire row ownership */
2226   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2227   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2228   for (i=0;i<nsubdomains;i++) {
2229     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2230       row_ownership[j]=i;
2231     }
2232   }
2233   /* map indices of local mat to global values */
2234   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2235   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2236   for (i=0;i<local_rows;i++) local_indices[i]=i;
2237   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2238   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2239 
2240   /*
2241      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2242      then, they will be summed up properly. This way, preallocation is always sufficient
2243   */
2244   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2245   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2246   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2247   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2248   for (i=0;i<local_rows;i++) {
2249     index_row = global_indices[i];
2250     for (j=i;j<local_rows;j++) {
2251       owner = row_ownership[index_row];
2252       index_col = global_indices[j];
2253       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2254         my_dnz[i] += 1.0;
2255       } else { /* offdiag block */
2256         my_onz[i] += 1.0;
2257       }
2258       /* same as before, interchanging rows and cols */
2259       if (i != j) {
2260         owner = row_ownership[index_col];
2261         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2262           my_dnz[j] += 1.0;
2263         } else {
2264           my_onz[j] += 1.0;
2265         }
2266       }
2267     }
2268   }
2269   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2270   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2271   if (local_rows) { /* multilevel guard */
2272     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2273     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2274   }
2275   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2276   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2277   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2278   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2279   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2280   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2281   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2282 
2283   /* set computed preallocation in dnz and onz */
2284   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2285   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2286   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2287   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2288   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2289   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2290   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2291   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2292 
2293   /* Resize preallocation if overestimated */
2294   for (i=0;i<lrows;i++) {
2295     dnz[i] = PetscMin(dnz[i],lcols);
2296     onz[i] = PetscMin(onz[i],cols-lcols);
2297   }
2298   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2299   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2300 
2301   /*
2302     Set values. Very Basic.
2303   */
2304   for (i=0;i<local_rows;i++) {
2305     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2306     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2307     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2308     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2309     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2310   }
2311   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2313   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2314 
2315   /* get back mat */
2316   *M = new_mat;
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatISSubassemble_Private"
2322 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2323 {
2324   Mat             subdomain_adj;
2325   IS              new_ranks,ranks_send_to;
2326   MatPartitioning partitioner;
2327   Mat_IS          *matis;
2328   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2329   PetscInt        prank;
2330   PetscMPIInt     size,rank,color;
2331   PetscInt        *xadj,*adjncy,*oldranks;
2332   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2333   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2334   PetscErrorCode  ierr;
2335   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2336   PetscSubcomm    subcomm;
2337 
2338   PetscFunctionBegin;
2339   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2340   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2341   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2342 
2343   /* Get info on mapping */
2344   matis = (Mat_IS*)(mat->data);
2345   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2346   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2347 
2348   /* build local CSR graph of subdomains' connectivity */
2349   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2350   xadj[0] = 0;
2351   xadj[1] = PetscMax(n_neighs-1,0);
2352   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2353   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2354 
2355   if (threshold) {
2356     PetscInt* count,min_threshold;
2357     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2358     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2359     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2360       for (j=0;j<n_shared[i];j++) {
2361         count[shared[i][j]] += 1;
2362       }
2363     }
2364     /* adapt threshold since we dont want to lose significant connections */
2365     min_threshold = n_neighs;
2366     for (i=1;i<n_neighs;i++) {
2367       for (j=0;j<n_shared[i];j++) {
2368         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2369       }
2370     }
2371     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2372     threshold = PetscMax(min_threshold+1,threshold);
2373     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2374     xadj[1] = 0;
2375     for (i=1;i<n_neighs;i++) {
2376       for (j=0;j<n_shared[i];j++) {
2377         if (count[shared[i][j]] < threshold) {
2378           adjncy[xadj[1]] = neighs[i];
2379           adjncy_wgt[xadj[1]] = n_shared[i];
2380           xadj[1]++;
2381           break;
2382         }
2383       }
2384     }
2385     ierr = PetscFree(count);CHKERRQ(ierr);
2386   } else {
2387     if (xadj[1]) {
2388       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2389       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2390     }
2391   }
2392   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2393   if (use_square) {
2394     for (i=0;i<xadj[1];i++) {
2395       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2396     }
2397   }
2398   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2399 
2400   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2401 
2402   /*
2403     Restrict work on active processes only.
2404   */
2405   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2406   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2407   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2408   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2409   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr);
2410   if (color) {
2411     ierr = PetscFree(xadj);CHKERRQ(ierr);
2412     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2413     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2414   } else {
2415     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2416     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2417     prank = rank;
2418     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2419     for (i=0;i<size;i++) {
2420       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2421     }
2422     for (i=0;i<xadj[1];i++) {
2423       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2424     }
2425     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2426     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2427     n_subdomains = (PetscInt)size;
2428     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2429 
2430     /* Partition */
2431     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2432     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2433     if (use_vwgt) {
2434       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2435       v_wgt[0] = local_size;
2436       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2437     }
2438     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2439     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2440     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2441     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2442     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2443 
2444     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2445     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2446     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2447     /* clean up */
2448     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2449     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2450     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2451     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2452   }
2453   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2454 
2455   /* assemble parallel IS for sends */
2456   i = 1;
2457   if (color) i=0;
2458   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2459   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2460 
2461   /* get back IS */
2462   *is_sends = ranks_send_to;
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatISSubassemble"
2470 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2471 {
2472   Mat                    local_mat,new_mat;
2473   Mat_IS                 *matis;
2474   IS                     is_sends_internal;
2475   PetscInt               rows,cols;
2476   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2477   PetscBool              ismatis,isdense;
2478   ISLocalToGlobalMapping l2gmap;
2479   PetscInt*              l2gmap_indices;
2480   const PetscInt*        is_indices;
2481   MatType                new_local_type;
2482   MatTypePrivate         new_local_type_private;
2483   /* buffers */
2484   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2485   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2486   /* MPI */
2487   MPI_Comm               comm;
2488   PetscMPIInt            n_sends,n_recvs,commsize;
2489   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2490   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2491   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2492   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2493   PetscErrorCode         ierr;
2494 
2495   PetscFunctionBegin;
2496   /* checks */
2497   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2498   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2499   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2500   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2501   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2502   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2503   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2504   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2505   PetscValidLogicalCollectiveInt(mat,bs,0);
2506   /* prepare IS for sending if not provided */
2507   if (!is_sends) {
2508     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2509     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2510   } else {
2511     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2512     is_sends_internal = is_sends;
2513   }
2514 
2515   /* get pointer of MATIS data */
2516   matis = (Mat_IS*)mat->data;
2517 
2518   /* get comm */
2519   comm = PetscObjectComm((PetscObject)mat);
2520 
2521   /* compute number of sends */
2522   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2523   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2524 
2525   /* compute number of receives */
2526   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2527   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2528   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2529   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2530   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2531   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2532   ierr = PetscFree(iflags);CHKERRQ(ierr);
2533 
2534   /* prepare send/receive buffers */
2535   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2536   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2537   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2538   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2539 
2540   /* Get data from local mat */
2541   if (!isdense) {
2542     /* TODO: See below some guidelines on how to prepare the local buffers */
2543     /*
2544        send_buffer_vals should contain the raw values of the local matrix
2545        send_buffer_idxs should contain:
2546        - MatType_PRIVATE type
2547        - PetscInt        size_of_l2gmap
2548        - PetscInt        global_row_indices[size_of_l2gmap]
2549        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2550     */
2551   } else {
2552     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2553     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2554     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2555     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2556     send_buffer_idxs[1] = i;
2557     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2558     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2559     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2560     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2561     for (i=0;i<n_sends;i++) {
2562       ilengths_vals[is_indices[i]] = len*len;
2563       ilengths_idxs[is_indices[i]] = len+2;
2564     }
2565   }
2566   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2567   buf_size_idxs = 0;
2568   buf_size_vals = 0;
2569   for (i=0;i<n_recvs;i++) {
2570     buf_size_idxs += (PetscInt)olengths_idxs[i];
2571     buf_size_vals += (PetscInt)olengths_vals[i];
2572   }
2573   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2574   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2575 
2576   /* get new tags for clean communications */
2577   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2578   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2579 
2580   /* allocate for requests */
2581   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2582   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2583   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2584   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2585 
2586   /* communications */
2587   ptr_idxs = recv_buffer_idxs;
2588   ptr_vals = recv_buffer_vals;
2589   for (i=0;i<n_recvs;i++) {
2590     source_dest = onodes[i];
2591     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2592     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2593     ptr_idxs += olengths_idxs[i];
2594     ptr_vals += olengths_vals[i];
2595   }
2596   for (i=0;i<n_sends;i++) {
2597     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2598     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2599     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2600   }
2601   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2602   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2603 
2604   /* assemble new l2g map */
2605   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2606   ptr_idxs = recv_buffer_idxs;
2607   buf_size_idxs = 0;
2608   for (i=0;i<n_recvs;i++) {
2609     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2610     ptr_idxs += olengths_idxs[i];
2611   }
2612   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2613   ptr_idxs = recv_buffer_idxs;
2614   buf_size_idxs = 0;
2615   for (i=0;i<n_recvs;i++) {
2616     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2617     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2618     ptr_idxs += olengths_idxs[i];
2619   }
2620   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2621   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2622   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2623 
2624   /* infer new local matrix type from received local matrices type */
2625   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2626   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2627   new_local_type_private = MATAIJ_PRIVATE;
2628   new_local_type = MATSEQAIJ;
2629   if (n_recvs) {
2630     new_local_type_private = send_buffer_idxs[0];
2631     ptr_idxs = recv_buffer_idxs;
2632     for (i=0;i<n_recvs;i++) {
2633       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2634         new_local_type_private = MATAIJ_PRIVATE;
2635         break;
2636       }
2637       ptr_idxs += olengths_idxs[i];
2638     }
2639     switch (new_local_type_private) {
2640       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2641         new_local_type = MATSEQAIJ;
2642         bs = 1;
2643         break;
2644       case MATAIJ_PRIVATE:
2645         new_local_type = MATSEQAIJ;
2646         bs = 1;
2647         break;
2648       case MATBAIJ_PRIVATE:
2649         new_local_type = MATSEQBAIJ;
2650         break;
2651       case MATSBAIJ_PRIVATE:
2652         new_local_type = MATSEQSBAIJ;
2653         break;
2654       default:
2655         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2656         break;
2657     }
2658   }
2659 
2660   /* create MATIS object */
2661   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2662   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2663   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2664   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2665   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2666   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2667 
2668   /* set values */
2669   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2670   ptr_vals = recv_buffer_vals;
2671   ptr_idxs = recv_buffer_idxs;
2672   for (i=0;i<n_recvs;i++) {
2673     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2674       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2675       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2676       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2677       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2678       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2679     }
2680     ptr_idxs += olengths_idxs[i];
2681     ptr_vals += olengths_vals[i];
2682   }
2683   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2686   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2687 
2688   { /* check */
2689     Vec       lvec,rvec;
2690     PetscReal infty_error;
2691 
2692     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2693     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2694     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2695     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2696     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2697     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2698     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2699     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2700     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2701   }
2702 
2703   /* free workspace */
2704   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2705   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2706   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2707   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2708   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2709   if (isdense) {
2710     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2711     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2712   } else {
2713     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2714   }
2715   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2716   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2717   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2718   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2719   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2720   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2721   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2722   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2723   ierr = PetscFree(onodes);CHKERRQ(ierr);
2724   /* get back new mat */
2725   *mat_n = new_mat;
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2731 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2732 {
2733   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2734   PC_IS                  *pcis = (PC_IS*)pc->data;
2735   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2736   MatNullSpace           CoarseNullSpace=NULL;
2737   ISLocalToGlobalMapping coarse_islg;
2738   IS                     coarse_is;
2739   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2740   PetscInt               im_active=-1,active_procs=-1;
2741   PC                     pc_temp;
2742   PCType                 coarse_pc_type;
2743   KSPType                coarse_ksp_type;
2744   PetscBool              multilevel_requested,multilevel_allowed;
2745   PetscBool              setsym,issym,isbddc,isnn;
2746   MatStructure           matstruct;
2747   PetscErrorCode         ierr;
2748 
2749   PetscFunctionBegin;
2750   /* Assign global numbering to coarse dofs */
2751   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2752 
2753   /* infer some info from user */
2754   issym = PETSC_FALSE;
2755   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2756   multilevel_allowed = PETSC_FALSE;
2757   multilevel_requested = PETSC_FALSE;
2758   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2759   if (multilevel_requested) {
2760     /* count "active processes" */
2761     im_active = !!(pcis->n);
2762     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2763     if (active_procs/pcbddc->coarsening_ratio < 2) {
2764       multilevel_allowed = PETSC_FALSE;
2765     } else {
2766       multilevel_allowed = PETSC_TRUE;
2767     }
2768   }
2769 
2770   /* set defaults for coarse KSP and PC */
2771   if (multilevel_allowed) {
2772     if (issym) {
2773       coarse_ksp_type = KSPRICHARDSON;
2774     } else {
2775       coarse_ksp_type = KSPCHEBYSHEV;
2776     }
2777     coarse_pc_type = PCBDDC;
2778   } else {
2779     coarse_ksp_type = KSPPREONLY;
2780     coarse_pc_type = PCREDUNDANT;
2781   }
2782 
2783   /* create the coarse KSP object only once with defaults */
2784   if (!pcbddc->coarse_ksp) {
2785     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2786     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2787     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2788     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2789     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2790     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2791     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
2792   }
2793   /* allow user customization */
2794   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2795 
2796   /* get some info after set from options */
2797   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2798   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
2799   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2800   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
2801     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2802     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2803     isbddc = PETSC_FALSE;
2804   }
2805 
2806   /* propagate BDDC info to the next level */
2807   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2808   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2809   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2810 
2811   /* creates temporary MATIS object for coarse matrix */
2812   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2813   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2814   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2815   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2816   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2817   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2818   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2820   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2821   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2822 
2823   /* assemble coarse matrix */
2824   if (isbddc || isnn) {
2825     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2826   } else {
2827     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2828   }
2829   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2830 
2831   /* create local to global scatters for coarse problem */
2832   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2833   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2834   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2835 
2836   /* propagate symmetry info to coarse matrix */
2837   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2838 
2839   /* Compute coarse null space (special handling by BDDC only) */
2840   if (pcbddc->NullSpace) {
2841     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2842     if (isbddc) {
2843       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2844     } else {
2845       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2846     }
2847   }
2848 
2849   /* set operators */
2850   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
2851   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
2852 
2853   /* additional KSP customization */
2854   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2855   if (max_it < 5) {
2856     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2857   }
2858   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
2859 
2860 
2861   /* print some info if requested */
2862   if (pcbddc->dbg_flag) {
2863     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
2864     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
2865     if (!multilevel_allowed) {
2866       if (multilevel_requested) {
2867         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2868       } else if (pcbddc->max_levels) {
2869         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
2870       }
2871     }
2872     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level);CHKERRQ(ierr);
2873     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2874   }
2875 
2876   /* setup coarse ksp */
2877   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2878   if (pcbddc->dbg_flag) {
2879     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
2880     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2881     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
2882     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2883   }
2884 
2885   /* HACK: TODO see if it could be avoided */
2886   ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
2887 
2888   /* Check coarse problem if requested */
2889   if (pcbddc->dbg_flag) {
2890     KSP       check_ksp;
2891     KSPType   check_ksp_type;
2892     PC        check_pc;
2893     Vec       check_vec;
2894     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
2895     PetscInt  its;
2896     PetscBool ispreonly,compute;
2897 
2898     /* Create ksp object suitable for estimation of extreme eigenvalues */
2899     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
2900     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2901     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
2902     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
2903     if (ispreonly) {
2904       check_ksp_type = KSPPREONLY;
2905       compute = PETSC_FALSE;
2906     } else {
2907       if (issym) check_ksp_type = KSPCG;
2908       else check_ksp_type = KSPGMRES;
2909       compute = PETSC_TRUE;
2910     }
2911     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
2912     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
2913     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
2914     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
2915     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
2916     /* create random vec */
2917     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
2918     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
2919     if (CoarseNullSpace) {
2920       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
2921     }
2922     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2923     /* solve coarse problem */
2924     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
2925     if (CoarseNullSpace) {
2926       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
2927     }
2928     /* check coarse problem residual error */
2929     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
2930     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2931     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2932     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
2933     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
2934     /* get eigenvalue estimation if preonly has not been requested */
2935     if (!ispreonly) {
2936       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
2937       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
2938       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
2939     }
2940     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
2941     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
2942     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2943     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
2944   }
2945   /* free memory */
2946   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
2947   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
2953 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
2954 {
2955   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2956   PC_IS*         pcis = (PC_IS*)pc->data;
2957   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
2958   PetscInt       i,j,coarse_size;
2959   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   /* get indices in local ordering for vertices and constraints */
2964   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2965   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2966   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2967   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2968   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2969   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2970   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2971 
2972   /* Compute global number of coarse dofs */
2973   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2974 
2975   /* check numbering */
2976   if (pcbddc->dbg_flag) {
2977     PetscScalar coarsesum,*array;
2978 
2979     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2980     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2981     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
2982     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2983     for (i=0;i<pcbddc->local_primal_size;i++) {
2984       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2985     }
2986     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
2987     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
2988     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2989     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2990     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2991     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2992     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2993     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2994     for (i=0;i<pcis->n;i++) {
2995       if (array[i] == 1.0) {
2996         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
2997       }
2998     }
2999     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3000     for (i=0;i<pcis->n;i++) {
3001       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3002     }
3003     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3004     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3005     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3006     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3007     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3008     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3009     if (pcbddc->dbg_flag > 1) {
3010       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3011       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3012       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3013       for (i=0;i<pcbddc->local_primal_size;i++) {
3014         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3015       }
3016       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3017     }
3018     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3019   }
3020   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3021   /* get back data */
3022   *coarse_size_n = coarse_size;
3023   *local_primal_indices_n = local_primal_indices;
3024   PetscFunctionReturn(0);
3025 }
3026 
3027