xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 863295e034b4a80f8420614179e8b167eecec94e)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCSetLevel"
47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
48 {
49   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   pcbddc->current_level=level;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCBDDCResetCustomization"
58 PetscErrorCode PCBDDCResetCustomization(PC pc)
59 {
60   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
61   PetscInt       i;
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
66   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
67   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
68   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
70   for (i=0;i<pcbddc->n_ISForDofs;i++) {
71     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "PCBDDCResetTopography"
79 PetscErrorCode PCBDDCResetTopography(PC pc)
80 {
81   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
86   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
87   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCBDDCResetSolvers"
93 PetscErrorCode PCBDDCResetSolvers(PC pc)
94 {
95   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
101   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr);
103   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
104   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
105   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
106   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
107   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
108   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
110   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
113   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
114   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
115   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
116   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
117   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
118   ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr);
119   ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
120   ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
121   ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
122   ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 #undef __FUNCT__
127 #define __FUNCT__ "PCBDDCCreateWorkVectors"
128 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
129 {
130   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
131   PC_IS          *pcis = (PC_IS*)pc->data;
132   VecType        impVecType;
133   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
134   PetscErrorCode ierr;
135 
136   PetscFunctionBegin;
137   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
138   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
139   local_primal_size = n_constraints+n_vertices;
140   n_R = pcis->n-n_vertices;
141   /* local work vectors */
142   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
143   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
144   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
145   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
146   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
147   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
148   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
149   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
150   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
151   if (n_constraints) {
152     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
153     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
154     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
161 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
162 {
163   PetscErrorCode         ierr;
164   /* pointers to pcis and pcbddc */
165   PC_IS*                 pcis = (PC_IS*)pc->data;
166   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
167   /* submatrices of local problem */
168   Mat                    A_RV,A_VR,A_VV;
169   /* working matrices */
170   Mat                    M1,M2,M3,C_CR;
171   /* working vectors */
172   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
173   /* additional working stuff */
174   IS                     is_aux;
175   ISLocalToGlobalMapping BtoNmap;
176   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
177   const PetscScalar      *array,*row_cmat_values;
178   const PetscInt         *row_cmat_indices,*idx_R_local;
179   PetscInt               *vertices,*idx_V_B,*auxindices;
180   PetscInt               n_vertices,n_constraints,size_of_constraint;
181   PetscInt               i,j,n_R,n_D,n_B;
182   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
183   /* Vector and matrix types */
184   VecType                impVecType;
185   MatType                impMatType;
186   /* some shortcuts to scalars */
187   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
188   /* for debugging purposes */
189   PetscReal              *coarsefunctions_errors,*constraints_errors;
190 
191   PetscFunctionBegin;
192   /* get number of vertices and their local indices */
193   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
194   n_constraints = pcbddc->local_primal_size-n_vertices;
195   /* Set Non-overlapping dimensions */
196   n_B = pcis->n_B; n_D = pcis->n - n_B;
197   n_R = pcis->n-n_vertices;
198 
199   /* Set types for local objects needed by BDDC precondtioner */
200   impMatType = MATSEQDENSE;
201   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
202 
203   /* Allocating some extra storage just to be safe */
204   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
205   for (i=0;i<pcis->n;i++) auxindices[i]=i;
206 
207   /* vertices in boundary numbering */
208   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
209   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
210   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
211   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
212   if (i != n_vertices) {
213     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
214   }
215 
216   /* some work vectors on vertices and/or constraints */
217   if (n_vertices) {
218     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
219     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
220     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
221     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
222   }
223   if (n_constraints) {
224     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
225     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
226   }
227 
228   /* Precompute stuffs needed for preprocessing and application of BDDC*/
229   if (n_constraints) {
230     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
231     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
232     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
233     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
234 
235     /* Extract constraints on R nodes: C_{CR}  */
236     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
237     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
238     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
239 
240     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
241     for (i=0;i<n_constraints;i++) {
242       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
243       /* Get row of constraint matrix in R numbering */
244       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
245       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
246       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
247       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
248       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
249       /* Solve for row of constraint matrix in R numbering */
250       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
251       /* Set values in local_auxmat2 */
252       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
253       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
254       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
255     }
256     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
259 
260     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
261     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
262     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
263     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
264     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
265     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
266     ierr = MatSetUp(M1);CHKERRQ(ierr);
267     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
268     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
269     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
270     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
271     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
272     ierr = MatDestroy(&M2);CHKERRQ(ierr);
273     ierr = MatDestroy(&M3);CHKERRQ(ierr);
274     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
275     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
276   }
277 
278   /* Get submatrices from subdomain matrix */
279   if (n_vertices) {
280     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
281     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
282     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
283     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
284     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
285   }
286 
287   /* Matrix of coarse basis functions (local) */
288   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
289   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
290   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
291   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
292   if (pcbddc->switch_static || pcbddc->dbg_flag) {
293     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
294     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
295     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
296     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
297   }
298 
299   if (pcbddc->dbg_flag) {
300     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
301     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
303   }
304   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
305   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
306 
307   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
308 
309   /* vertices */
310   for (i=0;i<n_vertices;i++) {
311     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
312     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
313     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
314     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
315     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
316     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
317     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
318     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
319     if (n_constraints) {
320       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
321       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
322       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
323     }
324     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
325     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
326 
327     /* Set values in coarse basis function and subdomain part of coarse_mat */
328     /* coarse basis functions */
329     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
330     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
331     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
333     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
334     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
335     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
336     if (pcbddc->switch_static || pcbddc->dbg_flag) {
337       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
338       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
339       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
340       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
341       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
342     }
343     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
344     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
345     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
346     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
347     if (n_constraints) {
348       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
349       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
350       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
351     }
352 
353     /* check */
354     if (pcbddc->dbg_flag) {
355       /* assemble subdomain vector on local nodes */
356       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
357       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
358       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
359       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
360       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
361       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
362       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
363       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
364       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
365       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
366       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
367       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
368       if (n_constraints) {
369         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
370         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
371         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
372       }
373       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
374       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
375       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
376       /* check saddle point solution */
377       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
378       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
379       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
380       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
381       /* shift by the identity matrix */
382       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
383       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
384       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
385       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
386     }
387   }
388 
389   /* constraints */
390   for (i=0;i<n_constraints;i++) {
391     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
392     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
393     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
394     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
395     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
396     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
397     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
398     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
399     if (n_vertices) {
400       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
401     }
402     /* Set values in coarse basis function and subdomain part of coarse_mat */
403     /* coarse basis functions */
404     j = i+n_vertices; /* don't touch this! */
405     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
406     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
409     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
410     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
411     if (pcbddc->switch_static || pcbddc->dbg_flag) {
412       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
413       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
414       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
415       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
417     }
418     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
419     if (n_vertices) {
420       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
421       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
423     }
424     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
425     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
426     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
427 
428     if (pcbddc->dbg_flag) {
429       /* assemble subdomain vector on nodes */
430       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
431       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
432       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
433       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
434       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
435       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
436       /* assemble subdomain vector of lagrange multipliers */
437       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
438       if (n_vertices) {
439         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
440         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
441         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
442       }
443       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
444       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
445       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
449       /* check saddle point solution */
450       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
451       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
452       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
453       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
454       /* shift by the identity matrix */
455       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
456       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
457       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
458       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
459     }
460   }
461   /* call assembling routines for local coarse basis */
462   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
463   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
464   if (pcbddc->switch_static || pcbddc->dbg_flag) {
465     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   }
468 
469   /* compute other basis functions for non-symmetric problems */
470   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
471   if (!setsym || (setsym && !issym)) {
472     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
473     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
474     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
475     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     if (pcbddc->switch_static || pcbddc->dbg_flag) {
477       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
478       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
479       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
480       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
481     }
482     for (i=0;i<pcbddc->local_primal_size;i++) {
483       if (n_constraints) {
484         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
485         for (j=0;j<n_constraints;j++) {
486           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
487         }
488         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
489         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
490       }
491       if (i<n_vertices) {
492         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
493         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
494         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
495         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
496         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
497         if (n_constraints) {
498           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
499         }
500       } else {
501         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
502       }
503       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
504       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
505       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
508       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
509       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
510       if (i<n_vertices) {
511         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
512       }
513       if (pcbddc->switch_static || pcbddc->dbg_flag) {
514         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
515         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
516         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
517         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
518         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
519       }
520 
521       if (pcbddc->dbg_flag) {
522         /* assemble subdomain vector on nodes */
523         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
524         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
525         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
526         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
527         if (i<n_vertices) {
528           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
529         }
530         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
531         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
532         /* assemble subdomain vector of lagrange multipliers */
533         for (j=0;j<pcbddc->local_primal_size;j++) {
534           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
535         }
536         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
537         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
538         /* check saddle point solution */
539         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
540         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
541         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
542         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
543         /* shift by the identity matrix */
544         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
545         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
546         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
547         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
548       }
549     }
550     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     if (pcbddc->switch_static || pcbddc->dbg_flag) {
553       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     }
556   }
557   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
558   /* Checking coarse_sub_mat and coarse basis functios */
559   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
560   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
561   if (pcbddc->dbg_flag) {
562     Mat         coarse_sub_mat;
563     Mat         AUXMAT,TM1,TM2,TM3,TM4;
564     Mat         coarse_phi_D,coarse_phi_B;
565     Mat         coarse_psi_D,coarse_psi_B;
566     Mat         A_II,A_BB,A_IB,A_BI;
567     MatType     checkmattype=MATSEQAIJ;
568     PetscReal   real_value;
569 
570     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
571     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
572     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
573     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
574     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
575     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
576     if (pcbddc->coarse_psi_B) {
577       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
578       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
579     }
580     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
581 
582     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
583     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
584     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
585     if (pcbddc->coarse_psi_B) {
586       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
593       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
594       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
595       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598     } else {
599       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
600       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
601       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
602       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
604       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607     }
608     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
609     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
610     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
611     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
612     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
613     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
614     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
615     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
616     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
617     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
618     for (i=0;i<pcbddc->local_primal_size;i++) {
619       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
620     }
621     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
622     for (i=0;i<pcbddc->local_primal_size;i++) {
623       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
624     }
625     if (pcbddc->coarse_psi_B) {
626       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
627       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
628         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
629       }
630       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
631       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
632         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
633       }
634     }
635     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
636     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
637     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
638     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
639     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
640     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
641     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
642     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
643     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
644     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
645     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
646     if (pcbddc->coarse_psi_B) {
647       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
648       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
649     }
650     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
651     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
652     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
653     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
654   }
655   /* free memory */
656   if (n_vertices) {
657     ierr = PetscFree(vertices);CHKERRQ(ierr);
658     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
659     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
660     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
661     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
662     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
663   }
664   if (n_constraints) {
665     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
666     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
667     ierr = MatDestroy(&M1);CHKERRQ(ierr);
668     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
669   }
670   ierr = PetscFree(auxindices);CHKERRQ(ierr);
671   /* get back data */
672   *coarse_submat_vals_n = coarse_submat_vals;
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
678 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
679 {
680   PC_IS*            pcis = (PC_IS*)(pc->data);
681   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
682   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
683   /* manage repeated solves */
684   MatReuse          reuse;
685   MatStructure      matstruct;
686   PetscErrorCode    ierr;
687 
688   PetscFunctionBegin;
689   /* get mat flags */
690   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
691   reuse = MAT_INITIAL_MATRIX;
692   if (pc->setupcalled) {
693     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
694     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
695     if (matstruct == SAME_NONZERO_PATTERN) {
696       reuse = MAT_REUSE_MATRIX;
697     } else {
698       reuse = MAT_INITIAL_MATRIX;
699     }
700   }
701   if (reuse == MAT_INITIAL_MATRIX) {
702     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
703     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
704     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
705     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
706     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
707   }
708 
709   /* transform local matrices if needed */
710   if (pcbddc->use_change_of_basis) {
711     Mat         change_mat_all;
712     PetscScalar *row_cmat_values;
713     PetscInt    *row_cmat_indices;
714     PetscInt    *nnz,*is_indices,*temp_indices;
715     PetscInt    i,j,k,n_D,n_B;
716 
717     /* Get Non-overlapping dimensions */
718     n_B = pcis->n_B;
719     n_D = pcis->n-n_B;
720 
721     /* compute nonzero structure of change of basis on all local nodes */
722     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
723     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
724     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
725     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
726     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
727     k=1;
728     for (i=0;i<n_B;i++) {
729       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
730       nnz[is_indices[i]]=j;
731       if (k < j) k = j;
732       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
733     }
734     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
735     /* assemble change of basis matrix on the whole set of local dofs */
736     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
737     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
738     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
739     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
740     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
741     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
742     for (i=0;i<n_D;i++) {
743       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
744     }
745     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
746     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
747     for (i=0;i<n_B;i++) {
748       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
749       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
750       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
751       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
752     }
753     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
754     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
756     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
757     if (i==1) {
758       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
759     } else {
760       Mat work_mat;
761       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
762       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
763       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
764     }
765     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
766     ierr = PetscFree(nnz);CHKERRQ(ierr);
767     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
768   } else {
769     /* without change of basis, the local matrix is unchanged */
770     if (!pcbddc->local_mat) {
771       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
772       pcbddc->local_mat = matis->A;
773     }
774   }
775 
776   /* get submatrices */
777   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
778   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
779   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
780   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
786 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
787 {
788   PC_IS*         pcis = (PC_IS*)(pc->data);
789   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
790   IS             is_aux1,is_aux2;
791   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
792   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
793   PetscBT        bitmask;
794   PetscErrorCode ierr;
795 
796   PetscFunctionBegin;
797   /* Set Non-overlapping dimensions */
798   n_B = pcis->n_B; n_D = pcis->n - n_B;
799   /* get vertex indices from constraint matrix */
800   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
801   /* Set number of constraints */
802   n_constraints = pcbddc->local_primal_size-n_vertices;
803   /* create auxiliary bitmask */
804   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
805   for (i=0;i<n_vertices;i++) {
806     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
807   }
808   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
809   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
810   for (i=0, n_R=0; i<pcis->n; i++) {
811     if (!PetscBTLookup(bitmask,i)) {
812       idx_R_local[n_R] = i;
813       n_R++;
814     }
815   }
816   ierr = PetscFree(vertices);CHKERRQ(ierr);
817   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
818 
819   /* print some info if requested */
820   if (pcbddc->dbg_flag) {
821     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
822     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
823     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
824     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
825     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
826     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
827     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
828   }
829 
830   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
831   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
832   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
833   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
834   for (i=0; i<n_D; i++) {
835     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
836   }
837   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
838   for (i=0, j=0; i<n_R; i++) {
839     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
840       aux_array1[j++] = i;
841     }
842   }
843   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
844   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
845   for (i=0, j=0; i<n_B; i++) {
846     if (!PetscBTLookup(bitmask,is_indices[i])) {
847       aux_array2[j++] = i;
848     }
849   }
850   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
851   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
852   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
853   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
854   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
855 
856   if (pcbddc->switch_static || pcbddc->dbg_flag) {
857     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
858     for (i=0, j=0; i<n_R; i++) {
859       if (PetscBTLookup(bitmask,idx_R_local[i])) {
860         aux_array1[j++] = i;
861       }
862     }
863     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
864     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
865     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
866   }
867   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
873 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
874 {
875   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
876 
877   PetscFunctionBegin;
878   pcbddc->use_exact_dirichlet=use;
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
884 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
885 {
886   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
887   PC_IS          *pcis = (PC_IS*)pc->data;
888   PC             pc_temp;
889   Mat            A_RR;
890   Vec            vec1,vec2,vec3;
891   MatStructure   matstruct;
892   PetscScalar    m_one = -1.0;
893   PetscReal      value;
894   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   /* Creating PC contexts for local Dirichlet and Neumann problems */
899   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
900 
901   /* DIRICHLET PROBLEM */
902   /* Matrix for Dirichlet problem is pcis->A_II */
903   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
904   if (!pcbddc->ksp_D) { /* create object if not yet build */
905     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
906     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
907     /* default */
908     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
909     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
910     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
911     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
912     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
913   }
914   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
915   /* Allow user's customization */
916   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
917   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
918   if (!n_D) {
919     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
920     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
921   }
922   /* Set Up KSP for Dirichlet problem of BDDC */
923   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
924   /* set ksp_D into pcis data */
925   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
926   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
927   pcis->ksp_D = pcbddc->ksp_D;
928 
929   /* NEUMANN PROBLEM */
930   /* Matrix for Neumann problem is A_RR -> we need to create it */
931   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
932   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
933   if (!pcbddc->ksp_R) { /* create object if not yet build */
934     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
935     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
936     /* default */
937     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
938     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
939     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
940     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
941     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
942   }
943   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
944   /* Allow user's customization */
945   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
946   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
947   if (!n_R) {
948     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
949     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
950   }
951   /* Set Up KSP for Neumann problem of BDDC */
952   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
953 
954   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
955 
956   /* Dirichlet */
957   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
958   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
959   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
960   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
961   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
962   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
963   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
964   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
965   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
966   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
967   /* need to be adapted? */
968   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
969   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
970   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
971   /* print info */
972   if (pcbddc->dbg_flag) {
973     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
974     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
975     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
976     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
977     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
978   }
979   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
980     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
981   }
982 
983   /* Neumann */
984   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
985   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
986   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
987   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
988   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
989   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
990   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
991   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
992   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
993   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
994   /* need to be adapted? */
995   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
996   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
997   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
998   /* print info */
999   if (pcbddc->dbg_flag) {
1000     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
1001     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1002   }
1003   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1004     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1005   }
1006 
1007   /* free Neumann problem's matrix */
1008   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1014 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1015 {
1016   PetscErrorCode ierr;
1017   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1018 
1019   PetscFunctionBegin;
1020   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1021   if (pcbddc->local_auxmat1) {
1022     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1023     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1024   }
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1030 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1031 {
1032   PetscErrorCode ierr;
1033   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1034   PC_IS*            pcis = (PC_IS*)  (pc->data);
1035   const PetscScalar zero = 0.0;
1036 
1037   PetscFunctionBegin;
1038   /* Application of PHI^T (or PSI^T)  */
1039   if (pcbddc->coarse_psi_B) {
1040     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1041     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1042   } else {
1043     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1044     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1045   }
1046   /* Scatter data of coarse_rhs */
1047   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1048   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049 
1050   /* Local solution on R nodes */
1051   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1052   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1053   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   if (pcbddc->switch_static) {
1055     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1057   }
1058   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1059   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1060   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062   if (pcbddc->switch_static) {
1063     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1064     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1065   }
1066 
1067   /* Coarse solution */
1068   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1070     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1071   }
1072   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1074 
1075   /* Sum contributions from two levels */
1076   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1077   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1083 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1084 {
1085   PetscErrorCode ierr;
1086   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1087 
1088   PetscFunctionBegin;
1089   switch (pcbddc->coarse_communications_type) {
1090     case SCATTERS_BDDC:
1091       ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1092       break;
1093     case GATHERS_BDDC:
1094       break;
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1101 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1102 {
1103   PetscErrorCode ierr;
1104   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1105   PetscScalar*   array_to;
1106   PetscScalar*   array_from;
1107   MPI_Comm       comm;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1112   switch (pcbddc->coarse_communications_type) {
1113     case SCATTERS_BDDC:
1114       ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1115       break;
1116     case GATHERS_BDDC:
1117       if (vec_from) {
1118         ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr);
1119       }
1120       if (vec_to) {
1121         ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr);
1122       }
1123       switch(pcbddc->coarse_problem_type){
1124         case SEQUENTIAL_BDDC:
1125           if (smode == SCATTER_FORWARD) {
1126             ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1127             if (vec_to) {
1128               if (imode == ADD_VALUES) {
1129                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1130                   array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1131                 }
1132               } else {
1133                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1134                   array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1135                 }
1136               }
1137             }
1138           } else {
1139             if (vec_from) {
1140               if (imode == ADD_VALUES) {
1141                 MPI_Comm vec_from_comm;
1142                 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr);
1143                 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type);
1144               }
1145               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1146                 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
1147               }
1148             }
1149             ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1150           }
1151           break;
1152         case REPLICATED_BDDC:
1153           if (smode == SCATTER_FORWARD) {
1154             ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr);
1155             if (imode == ADD_VALUES) {
1156               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1157                 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1158               }
1159             } else {
1160               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1161                 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1162               }
1163             }
1164           } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
1165             if (imode == ADD_VALUES) {
1166               for (i=0;i<pcbddc->local_primal_size;i++) {
1167                 array_to[i]+=array_from[pcbddc->local_primal_indices[i]];
1168               }
1169             } else {
1170               for (i=0;i<pcbddc->local_primal_size;i++) {
1171                 array_to[i]=array_from[pcbddc->local_primal_indices[i]];
1172               }
1173             }
1174           }
1175           break;
1176         case MULTILEVEL_BDDC:
1177           break;
1178         case PARALLEL_BDDC:
1179           break;
1180       }
1181       if (vec_from) {
1182         ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr);
1183       }
1184       if (vec_to) {
1185         ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr);
1186       }
1187       break;
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 /* uncomment for testing purposes */
1193 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1194 #undef __FUNCT__
1195 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1196 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1197 {
1198   PetscErrorCode    ierr;
1199   PC_IS*            pcis = (PC_IS*)(pc->data);
1200   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1201   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1202   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1203   MatType           impMatType=MATSEQAIJ;
1204   /* one and zero */
1205   PetscScalar       one=1.0,zero=0.0;
1206   /* space to store constraints and their local indices */
1207   PetscScalar       *temp_quadrature_constraint;
1208   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1209   /* iterators */
1210   PetscInt          i,j,k,total_counts,temp_start_ptr;
1211   /* stuff to store connected components stored in pcbddc->mat_graph */
1212   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1213   PetscInt          n_ISForFaces,n_ISForEdges;
1214   /* near null space stuff */
1215   MatNullSpace      nearnullsp;
1216   const Vec         *nearnullvecs;
1217   Vec               *localnearnullsp;
1218   PetscBool         nnsp_has_cnst;
1219   PetscInt          nnsp_size;
1220   PetscScalar       *array;
1221   /* BLAS integers */
1222   PetscBLASInt      lwork,lierr;
1223   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1224   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1225   /* LAPACK working arrays for SVD or POD */
1226   PetscBool         skip_lapack;
1227   PetscScalar       *work;
1228   PetscReal         *singular_vals;
1229 #if defined(PETSC_USE_COMPLEX)
1230   PetscReal         *rwork;
1231 #endif
1232 #if defined(PETSC_MISSING_LAPACK_GESVD)
1233   PetscBLASInt      Blas_one_2=1;
1234   PetscScalar       *temp_basis,*correlation_mat;
1235 #else
1236   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1237   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1238 #endif
1239   /* change of basis */
1240   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1241   PetscBool         boolforchange;
1242   PetscBT           touched,change_basis;
1243   /* auxiliary stuff */
1244   PetscInt          *nnz,*is_indices,*local_to_B;
1245   /* some quantities */
1246   PetscInt          n_vertices,total_primal_vertices;
1247   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1248 
1249 
1250   PetscFunctionBegin;
1251   /* Get index sets for faces, edges and vertices from graph */
1252   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1253     pcbddc->use_vertices = PETSC_TRUE;
1254   }
1255   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1256   /* print some info */
1257   if (pcbddc->dbg_flag) {
1258     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1259     i = 0;
1260     if (ISForVertices) {
1261       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1265     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1266     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1267   }
1268   /* check if near null space is attached to global mat */
1269   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1270   if (nearnullsp) {
1271     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1272   } else { /* if near null space is not provided BDDC uses constants by default */
1273     nnsp_size = 0;
1274     nnsp_has_cnst = PETSC_TRUE;
1275   }
1276   /* get max number of constraints on a single cc */
1277   max_constraints = nnsp_size;
1278   if (nnsp_has_cnst) max_constraints++;
1279 
1280   /*
1281        Evaluate maximum storage size needed by the procedure
1282        - temp_indices will contain start index of each constraint stored as follows
1283        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1284        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1285        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1286                                                                                                                                                          */
1287   total_counts = n_ISForFaces+n_ISForEdges;
1288   total_counts *= max_constraints;
1289   n_vertices = 0;
1290   if (ISForVertices) {
1291     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1292   }
1293   total_counts += n_vertices;
1294   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1295   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1296   total_counts = 0;
1297   max_size_of_constraint = 0;
1298   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1299     if (i<n_ISForEdges) {
1300       used_IS = &ISForEdges[i];
1301     } else {
1302       used_IS = &ISForFaces[i-n_ISForEdges];
1303     }
1304     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1305     total_counts += j;
1306     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1307   }
1308   total_counts *= max_constraints;
1309   total_counts += n_vertices;
1310   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1311   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1312   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1313   /* local to boundary numbering */
1314   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1315   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1316   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1317   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1318   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1319   /* get local part of global near null space vectors */
1320   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1321   for (k=0;k<nnsp_size;k++) {
1322     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1323     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   }
1326 
1327   /* whether or not to skip lapack calls */
1328   skip_lapack = PETSC_TRUE;
1329   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1330 
1331   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1332   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1333     PetscScalar temp_work;
1334 #if defined(PETSC_MISSING_LAPACK_GESVD)
1335     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1336     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1337     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1338     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1339 #if defined(PETSC_USE_COMPLEX)
1340     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1341 #endif
1342     /* now we evaluate the optimal workspace using query with lwork=-1 */
1343     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1344     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1345     lwork = -1;
1346     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1347 #if !defined(PETSC_USE_COMPLEX)
1348     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1349 #else
1350     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1351 #endif
1352     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1353     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1354 #else /* on missing GESVD */
1355     /* SVD */
1356     PetscInt max_n,min_n;
1357     max_n = max_size_of_constraint;
1358     min_n = max_constraints;
1359     if (max_size_of_constraint < max_constraints) {
1360       min_n = max_size_of_constraint;
1361       max_n = max_constraints;
1362     }
1363     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1364 #if defined(PETSC_USE_COMPLEX)
1365     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1366 #endif
1367     /* now we evaluate the optimal workspace using query with lwork=-1 */
1368     lwork = -1;
1369     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1370     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1371     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1372     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1373 #if !defined(PETSC_USE_COMPLEX)
1374     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1375 #else
1376     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1377 #endif
1378     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1379     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1380 #endif /* on missing GESVD */
1381     /* Allocate optimal workspace */
1382     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1383     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1384   }
1385   /* Now we can loop on constraining sets */
1386   total_counts = 0;
1387   temp_indices[0] = 0;
1388   /* vertices */
1389   if (ISForVertices) {
1390     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1391     if (nnsp_has_cnst) { /* consider all vertices */
1392       for (i=0;i<n_vertices;i++) {
1393         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1394         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1395         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1396         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1397         total_counts++;
1398       }
1399     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1400       PetscBool used_vertex;
1401       for (i=0;i<n_vertices;i++) {
1402         used_vertex = PETSC_FALSE;
1403         k = 0;
1404         while (!used_vertex && k<nnsp_size) {
1405           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1406           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1407             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1408             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1409             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1410             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1411             total_counts++;
1412             used_vertex = PETSC_TRUE;
1413           }
1414           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1415           k++;
1416         }
1417       }
1418     }
1419     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1420     n_vertices = total_counts;
1421   }
1422 
1423   /* edges and faces */
1424   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1425     if (i<n_ISForEdges) {
1426       used_IS = &ISForEdges[i];
1427       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1428     } else {
1429       used_IS = &ISForFaces[i-n_ISForEdges];
1430       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1431     }
1432     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1433     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1434     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1435     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1436     /* change of basis should not be performed on local periodic nodes */
1437     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1438     if (nnsp_has_cnst) {
1439       PetscScalar quad_value;
1440       temp_constraints++;
1441       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1442       for (j=0;j<size_of_constraint;j++) {
1443         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1444         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1445         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1446       }
1447       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1448       if (boolforchange) {
1449         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1450       }
1451       total_counts++;
1452     }
1453     for (k=0;k<nnsp_size;k++) {
1454       PetscReal real_value;
1455       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1456       for (j=0;j<size_of_constraint;j++) {
1457         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1458         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1459         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1460       }
1461       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1462       /* check if array is null on the connected component */
1463       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1464       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1465       if (real_value > 0.0) { /* keep indices and values */
1466         temp_constraints++;
1467         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1468         if (boolforchange) {
1469           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1470         }
1471         total_counts++;
1472       }
1473     }
1474     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1475     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1476     if (!pcbddc->use_nnsp_true) {
1477       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1478 
1479 #if defined(PETSC_MISSING_LAPACK_GESVD)
1480       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1481          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1482          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1483             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1484             from that computed using LAPACKgesvd
1485          -> This is due to a different computation of eigenvectors in LAPACKheev
1486          -> The quality of the POD-computed basis will be the same */
1487       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1488       /* Store upper triangular part of correlation matrix */
1489       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1490       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1491       for (j=0;j<temp_constraints;j++) {
1492         for (k=0;k<j+1;k++) {
1493           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1494         }
1495       }
1496       /* compute eigenvalues and eigenvectors of correlation matrix */
1497       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1498       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1499 #if !defined(PETSC_USE_COMPLEX)
1500       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1501 #else
1502       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1503 #endif
1504       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1505       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1506       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1507       j=0;
1508       while (j < temp_constraints && singular_vals[j] < tol) j++;
1509       total_counts=total_counts-j;
1510       /* scale and copy POD basis into used quadrature memory */
1511       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1512       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1513       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1514       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1515       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1516       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1517       if (j<temp_constraints) {
1518         PetscInt ii;
1519         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1520         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1521         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1522         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1523         for (k=0;k<temp_constraints-j;k++) {
1524           for (ii=0;ii<size_of_constraint;ii++) {
1525             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1526           }
1527         }
1528       }
1529 #else  /* on missing GESVD */
1530       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1531       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1532       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1533       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1534 #if !defined(PETSC_USE_COMPLEX)
1535       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1536 #else
1537       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1538 #endif
1539       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1540       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1541       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1542       k = temp_constraints;
1543       if (k > size_of_constraint) k = size_of_constraint;
1544       j = 0;
1545       while (j < k && singular_vals[k-j-1] < tol) j++;
1546       total_counts = total_counts-temp_constraints+k-j;
1547 #endif /* on missing GESVD */
1548     }
1549   }
1550   /* free index sets of faces, edges and vertices */
1551   for (i=0;i<n_ISForFaces;i++) {
1552     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1553   }
1554   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1555   for (i=0;i<n_ISForEdges;i++) {
1556     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1557   }
1558   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1559   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1560 
1561   /* free workspace */
1562   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1563     ierr = PetscFree(work);CHKERRQ(ierr);
1564 #if defined(PETSC_USE_COMPLEX)
1565     ierr = PetscFree(rwork);CHKERRQ(ierr);
1566 #endif
1567     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1568 #if defined(PETSC_MISSING_LAPACK_GESVD)
1569     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1570     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1571 #endif
1572   }
1573   for (k=0;k<nnsp_size;k++) {
1574     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1575   }
1576   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1577 
1578   /* set quantities in pcbddc data structure */
1579   /* n_vertices defines the number of subdomain corners in the primal space */
1580   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1581   pcbddc->local_primal_size = total_counts;
1582   pcbddc->n_vertices = n_vertices;
1583   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1584 
1585   /* Create constraint matrix */
1586   /* The constraint matrix is used to compute the l2g map of primal dofs */
1587   /* so we need to set it up properly either with or without change of basis */
1588   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1589   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1590   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1591   /* array to compute a local numbering of constraints : vertices first then constraints */
1592   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1593   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1594   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1595   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1596   /* auxiliary stuff for basis change */
1597   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1598   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1599 
1600   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1601   total_primal_vertices=0;
1602   for (i=0;i<pcbddc->local_primal_size;i++) {
1603     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1604     if (size_of_constraint == 1) {
1605       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1606       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1607       aux_primal_minloc[total_primal_vertices]=0;
1608       total_primal_vertices++;
1609     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1610       PetscInt min_loc,min_index;
1611       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1612       /* find first untouched local node */
1613       k = 0;
1614       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1615       min_index = global_indices[k];
1616       min_loc = k;
1617       /* search the minimum among global nodes already untouched on the cc */
1618       for (k=1;k<size_of_constraint;k++) {
1619         /* there can be more than one constraint on a single connected component */
1620         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1621           min_index = global_indices[k];
1622           min_loc = k;
1623         }
1624       }
1625       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1626       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1627       aux_primal_minloc[total_primal_vertices]=min_loc;
1628       total_primal_vertices++;
1629     }
1630   }
1631   /* free workspace */
1632   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1633   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1634   /* permute indices in order to have a sorted set of vertices */
1635   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1636 
1637   /* nonzero structure of constraint matrix */
1638   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1639   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1640   j=total_primal_vertices;
1641   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1642     if (!PetscBTLookup(change_basis,i)) {
1643       nnz[j]=temp_indices[i+1]-temp_indices[i];
1644       j++;
1645     }
1646   }
1647   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1648   ierr = PetscFree(nnz);CHKERRQ(ierr);
1649   /* set values in constraint matrix */
1650   for (i=0;i<total_primal_vertices;i++) {
1651     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1652   }
1653   total_counts = total_primal_vertices;
1654   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1655     if (!PetscBTLookup(change_basis,i)) {
1656       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1657       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1658       total_counts++;
1659     }
1660   }
1661   /* assembling */
1662   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1664   /*
1665   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1666   */
1667   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1668   if (pcbddc->use_change_of_basis) {
1669     PetscBool qr_needed = PETSC_FALSE;
1670     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1671     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1672     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1673     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1674     /* work arrays */
1675     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1676     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1677     /* nonzeros per row */
1678     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1679       if (PetscBTLookup(change_basis,i)) {
1680         qr_needed = PETSC_TRUE;
1681         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1682         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1683       }
1684     }
1685     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1686     ierr = PetscFree(nnz);CHKERRQ(ierr);
1687     /* Set initial identity in the matrix */
1688     for (i=0;i<pcis->n_B;i++) {
1689       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1690     }
1691 
1692     /* Now we loop on the constraints which need a change of basis */
1693     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1694     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1695     /* Change of basis matrix T computed via QR decomposition of constraints */
1696     if (qr_needed) {
1697       /* dual and primal dofs on a single cc */
1698       PetscInt     dual_dofs,primal_dofs;
1699       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1700       PetscInt     primal_counter;
1701       /* working stuff for GEQRF */
1702       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1703       PetscBLASInt lqr_work;
1704       /* working stuff for UNGQR */
1705       PetscScalar  *gqr_work,lgqr_work_t;
1706       PetscBLASInt lgqr_work;
1707       /* working stuff for TRTRS */
1708       PetscScalar  *trs_rhs;
1709       PetscBLASInt Blas_NRHS;
1710       /* pointers for values insertion into change of basis matrix */
1711       PetscInt     *start_rows,*start_cols;
1712       PetscScalar  *start_vals;
1713       /* working stuff for values insertion */
1714       PetscBT      is_primal;
1715 
1716       /* space to store Q */
1717       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1718       /* first we issue queries for optimal work */
1719       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1720       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1721       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1722       lqr_work = -1;
1723       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1724       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1725       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1726       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1727       lgqr_work = -1;
1728       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1729       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1730       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1731       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1732       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1733       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1734       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1735       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1736       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1737       /* array to store scaling factors for reflectors */
1738       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1739       /* array to store rhs and solution of triangular solver */
1740       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1741       /* array to store whether a node is primal or not */
1742       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1743       for (i=0;i<total_primal_vertices;i++) {
1744         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1745       }
1746 
1747       /* allocating workspace for check */
1748       if (pcbddc->dbg_flag) {
1749         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1750         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1751         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1752       }
1753 
1754       /* loop on constraints and see whether or not they need a change of basis */
1755       /* -> using implicit ordering contained in temp_indices data */
1756       total_counts = pcbddc->n_vertices;
1757       primal_counter = total_counts;
1758       while (total_counts<pcbddc->local_primal_size) {
1759         primal_dofs = 1;
1760         if (PetscBTLookup(change_basis,total_counts)) {
1761           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1762           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1763             primal_dofs++;
1764           }
1765           /* get constraint info */
1766           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1767           dual_dofs = size_of_constraint-primal_dofs;
1768 
1769           /* copy quadrature constraints for change of basis check */
1770           if (pcbddc->dbg_flag) {
1771             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1772             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1773           }
1774 
1775           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1776           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1777 
1778           /* compute QR decomposition of constraints */
1779           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1780           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1781           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1782           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1783           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1784           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1785           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1786 
1787           /* explictly compute R^-T */
1788           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1789           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1790           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1791           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1792           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1793           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1794           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1795           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1796           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1797           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1798 
1799           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1800           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1801           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1802           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1803           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1804           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1805           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1806           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1807           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1808 
1809           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1810              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1811              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1812           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1813           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1814           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1815           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1816           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1817           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1818           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1819           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1820           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1821           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1822 
1823           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1824           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1825           /* insert cols for primal dofs */
1826           for (j=0;j<primal_dofs;j++) {
1827             start_vals = &qr_basis[j*size_of_constraint];
1828             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1829             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1830           }
1831           /* insert cols for dual dofs */
1832           for (j=0,k=0;j<dual_dofs;k++) {
1833             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1834               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1835               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1836               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1837               j++;
1838             }
1839           }
1840 
1841           /* check change of basis */
1842           if (pcbddc->dbg_flag) {
1843             PetscInt   ii,jj;
1844             PetscBool valid_qr=PETSC_TRUE;
1845             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1846             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1847             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1848             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1849             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1850             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1851             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1852             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1853             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1854             for (jj=0;jj<size_of_constraint;jj++) {
1855               for (ii=0;ii<primal_dofs;ii++) {
1856                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1857                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1858               }
1859             }
1860             if (!valid_qr) {
1861               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1862               for (jj=0;jj<size_of_constraint;jj++) {
1863                 for (ii=0;ii<primal_dofs;ii++) {
1864                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1865                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1866                   }
1867                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1868                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1869                   }
1870                 }
1871               }
1872             } else {
1873               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1874             }
1875           }
1876           /* increment primal counter */
1877           primal_counter += primal_dofs;
1878         } else {
1879           if (pcbddc->dbg_flag) {
1880             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1881           }
1882         }
1883         /* increment constraint counter total_counts */
1884         total_counts += primal_dofs;
1885       }
1886       if (pcbddc->dbg_flag) {
1887         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1888         ierr = PetscFree(work);CHKERRQ(ierr);
1889       }
1890       /* free workspace */
1891       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1892       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1893       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1894       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1895       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1896       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1897     }
1898     /* assembling */
1899     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1900     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1901     /*
1902     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1903     */
1904   }
1905   /* free workspace */
1906   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1907   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1908   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1909   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1910   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1911   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1912   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1913   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1919 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1920 {
1921   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1922   PC_IS       *pcis = (PC_IS*)pc->data;
1923   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1924   PetscInt    bs,ierr,i,vertex_size;
1925   PetscViewer viewer=pcbddc->dbg_viewer;
1926 
1927   PetscFunctionBegin;
1928   /* Init local Graph struct */
1929   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1930 
1931   /* Check validity of the csr graph passed in by the user */
1932   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1933     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1934   }
1935   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1936   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1937     Mat mat_adj;
1938     const PetscInt *xadj,*adjncy;
1939     PetscBool flg_row=PETSC_TRUE;
1940 
1941     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1942     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1943     if (!flg_row) {
1944       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1945     }
1946     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1947     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1948     if (!flg_row) {
1949       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1950     }
1951     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1952   }
1953 
1954   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1955   vertex_size = 1;
1956   if (!pcbddc->n_ISForDofs) {
1957     IS *custom_ISForDofs;
1958 
1959     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1960     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1961     for (i=0;i<bs;i++) {
1962       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1963     }
1964     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1965     /* remove my references to IS objects */
1966     for (i=0;i<bs;i++) {
1967       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1968     }
1969     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1970   } else { /* mat block size as vertex size (used for elasticity) */
1971     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1972   }
1973 
1974   /* Setup of Graph */
1975   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1976 
1977   /* Graph's connected components analysis */
1978   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1979 
1980   /* print some info to stdout */
1981   if (pcbddc->dbg_flag) {
1982     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1983   }
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1989 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[])
1990 {
1991   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1992   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1993   PetscErrorCode ierr;
1994 
1995   PetscFunctionBegin;
1996   n = 0;
1997   vertices = 0;
1998   if (pcbddc->ConstraintMatrix) {
1999     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2000     for (i=0;i<local_primal_size;i++) {
2001       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2002       if (size_of_constraint == 1) n++;
2003       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2004     }
2005     if (vertices_idx) {
2006       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2007       n = 0;
2008       for (i=0;i<local_primal_size;i++) {
2009         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2010         if (size_of_constraint == 1) {
2011           vertices[n++]=row_cmat_indices[0];
2012         }
2013         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2014       }
2015     }
2016   }
2017   *n_vertices = n;
2018   if (vertices_idx) *vertices_idx = vertices;
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2024 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[])
2025 {
2026   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2027   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2028   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2029   PetscBT        touched;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   n = 0;
2034   constraints_index = 0;
2035   if (pcbddc->ConstraintMatrix) {
2036     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2037     max_size_of_constraint = 0;
2038     for (i=0;i<local_primal_size;i++) {
2039       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2040       if (size_of_constraint > 1) {
2041         n++;
2042       }
2043       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2044       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2045     }
2046     if (constraints_idx) {
2047       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2048       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2049       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2050       n = 0;
2051       for (i=0;i<local_primal_size;i++) {
2052         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2053         if (size_of_constraint > 1) {
2054           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2055           /* find first untouched local node */
2056           j = 0;
2057           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2058           min_index = row_cmat_global_indices[j];
2059           min_loc = j;
2060           /* search the minimum among nodes not yet touched on the connected component
2061              since there can be more than one constraint on a single cc */
2062           for (j=1;j<size_of_constraint;j++) {
2063             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2064               min_index = row_cmat_global_indices[j];
2065               min_loc = j;
2066             }
2067           }
2068           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2069           constraints_index[n++] = row_cmat_indices[min_loc];
2070         }
2071         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2072       }
2073       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2074       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2075     }
2076   }
2077   *n_constraints = n;
2078   if (constraints_idx) *constraints_idx = constraints_index;
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /* the next two functions has been adapted from pcis.c */
2083 #undef __FUNCT__
2084 #define __FUNCT__ "PCBDDCApplySchur"
2085 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2086 {
2087   PetscErrorCode ierr;
2088   PC_IS          *pcis = (PC_IS*)(pc->data);
2089 
2090   PetscFunctionBegin;
2091   if (!vec2_B) { vec2_B = v; }
2092   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2093   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2094   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2095   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2096   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 #undef __FUNCT__
2101 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2102 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2103 {
2104   PetscErrorCode ierr;
2105   PC_IS          *pcis = (PC_IS*)(pc->data);
2106 
2107   PetscFunctionBegin;
2108   if (!vec2_B) { vec2_B = v; }
2109   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2110   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2111   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2112   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2113   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #undef __FUNCT__
2118 #define __FUNCT__ "PCBDDCSubsetNumbering"
2119 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2120 {
2121   Vec            local_vec,global_vec;
2122   IS             seqis,paris;
2123   VecScatter     scatter_ctx;
2124   PetscScalar    *array;
2125   PetscInt       *temp_global_dofs;
2126   PetscScalar    globalsum;
2127   PetscInt       i,j,s;
2128   PetscInt       nlocals,first_index,old_index,max_local;
2129   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2130   PetscMPIInt    *dof_sizes,*dof_displs;
2131   PetscBool      first_found;
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   /* mpi buffers */
2136   MPI_Comm_size(comm,&size_prec_comm);
2137   MPI_Comm_rank(comm,&rank_prec_comm);
2138   j = ( !rank_prec_comm ? size_prec_comm : 0);
2139   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2140   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2141   /* get maximum size of subset */
2142   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2143   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2144   max_local = 0;
2145   if (n_local_dofs) {
2146     max_local = temp_global_dofs[0];
2147     for (i=1;i<n_local_dofs;i++) {
2148       if (max_local < temp_global_dofs[i] ) {
2149         max_local = temp_global_dofs[i];
2150       }
2151     }
2152   }
2153   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2154   max_global++;
2155   max_local = 0;
2156   if (n_local_dofs) {
2157     max_local = local_dofs[0];
2158     for (i=1;i<n_local_dofs;i++) {
2159       if (max_local < local_dofs[i] ) {
2160         max_local = local_dofs[i];
2161       }
2162     }
2163   }
2164   max_local++;
2165   /* allocate workspace */
2166   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2167   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2168   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2169   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2170   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2171   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2172   /* create scatter */
2173   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2174   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2175   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2176   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2177   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2178   /* init array */
2179   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2180   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2181   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2182   if (local_dofs_mult) {
2183     for (i=0;i<n_local_dofs;i++) {
2184       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2185     }
2186   } else {
2187     for (i=0;i<n_local_dofs;i++) {
2188       array[local_dofs[i]]=1.0;
2189     }
2190   }
2191   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2192   /* scatter into global vec and get total number of global dofs */
2193   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2194   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2196   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2197   /* Fill global_vec with cumulative function for global numbering */
2198   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2199   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2200   nlocals = 0;
2201   first_index = -1;
2202   first_found = PETSC_FALSE;
2203   for (i=0;i<s;i++) {
2204     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2205       first_found = PETSC_TRUE;
2206       first_index = i;
2207     }
2208     nlocals += (PetscInt)PetscRealPart(array[i]);
2209   }
2210   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2211   if (!rank_prec_comm) {
2212     dof_displs[0]=0;
2213     for (i=1;i<size_prec_comm;i++) {
2214       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2215     }
2216   }
2217   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2218   if (first_found) {
2219     array[first_index] += (PetscScalar)nlocals;
2220     old_index = first_index;
2221     for (i=first_index+1;i<s;i++) {
2222       if (PetscRealPart(array[i]) > 0.0) {
2223         array[i] += array[old_index];
2224         old_index = i;
2225       }
2226     }
2227   }
2228   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2229   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2230   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2231   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2232   /* get global ordering of local dofs */
2233   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2234   if (local_dofs_mult) {
2235     for (i=0;i<n_local_dofs;i++) {
2236       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2237     }
2238   } else {
2239     for (i=0;i<n_local_dofs;i++) {
2240       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2241     }
2242   }
2243   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2244   /* free workspace */
2245   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2246   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2247   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2248   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2249   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2250   /* return pointer to global ordering of local dofs */
2251   *global_numbering_subset = temp_global_dofs;
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2257 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2258 {
2259   PetscInt       i,j;
2260   PetscScalar    *alphas;
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   /* this implements stabilized Gram-Schmidt */
2265   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2266   for (i=0;i<n;i++) {
2267     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2268     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2269     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2270   }
2271   ierr = PetscFree(alphas);CHKERRQ(ierr);
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 /* BDDC requires metis 5.0.1 for multilevel */
2276 #if defined(PETSC_HAVE_METIS)
2277 #include "metis.h"
2278 #define MetisInt    idx_t
2279 #define MetisScalar real_t
2280 #endif
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2284 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2285 {
2286   Mat_IS    *matis    = (Mat_IS*)pc->pmat->data;
2287   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
2288   PC_IS     *pcis     = (PC_IS*)pc->data;
2289   MPI_Comm  prec_comm;
2290   MPI_Comm  coarse_comm;
2291 
2292   PetscInt  coarse_size;
2293 
2294   MatNullSpace CoarseNullSpace;
2295 
2296   /* common to all choiches */
2297   PetscScalar *temp_coarse_mat_vals;
2298   PetscScalar *ins_coarse_mat_vals;
2299   PetscInt    *ins_local_primal_indices;
2300   PetscMPIInt *localsizes2,*localdispl2;
2301   PetscMPIInt size_prec_comm;
2302   PetscMPIInt rank_prec_comm;
2303   PetscMPIInt active_rank=MPI_PROC_NULL;
2304   PetscMPIInt master_proc=0;
2305   PetscInt    ins_local_primal_size;
2306   /* specific to MULTILEVEL_BDDC */
2307   PetscMPIInt *ranks_recv=0;
2308   PetscMPIInt count_recv=0;
2309   PetscMPIInt rank_coarse_proc_send_to=-1;
2310   PetscMPIInt coarse_color = MPI_UNDEFINED;
2311   ISLocalToGlobalMapping coarse_ISLG;
2312   /* some other variables */
2313   PetscErrorCode ierr;
2314   MatType coarse_mat_type;
2315   PCType  coarse_pc_type;
2316   KSPType coarse_ksp_type;
2317   PC pc_temp;
2318   PetscInt i,j,k;
2319   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
2320   /* verbose output viewer */
2321   PetscViewer viewer=pcbddc->dbg_viewer;
2322   PetscInt    dbg_flag=pcbddc->dbg_flag;
2323 
2324   PetscInt      offset,offset2;
2325   PetscMPIInt   im_active,active_procs;
2326   PetscInt      *dnz,*onz;
2327 
2328   PetscBool     setsym,issym=PETSC_FALSE;
2329 
2330   PetscFunctionBegin;
2331   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2332   ins_local_primal_indices = 0;
2333   ins_coarse_mat_vals      = 0;
2334   localsizes2              = 0;
2335   localdispl2              = 0;
2336   temp_coarse_mat_vals     = 0;
2337   coarse_ISLG              = 0;
2338 
2339   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2340   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2341   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2342 
2343   /* Assign global numbering to coarse dofs */
2344   {
2345     PetscInt     *auxlocal_primal,*aux_idx;
2346     PetscMPIInt  mpi_local_primal_size;
2347     PetscScalar  coarsesum,*array;
2348 
2349     mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
2350 
2351     /* Construct needed data structures for message passing */
2352     j = 0;
2353     if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2354       j = size_prec_comm;
2355     }
2356     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr);
2357     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2358     /* Gather local_primal_size information for all processes  */
2359     if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2360       ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2361     } else {
2362       ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr);
2363     }
2364     pcbddc->replicated_primal_size = 0;
2365     for (i=0; i<j; i++) {
2366       pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ;
2367       pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
2368     }
2369 
2370     /* This code fragment assumes that the number of local constraints per connected component
2371        is not greater than the number of nodes defined for the connected component
2372        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2373     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2374     ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2375     ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2376     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2377     ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2378     ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2379     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2380     /* Compute number of coarse dofs */
2381     ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr);
2382 
2383     if (dbg_flag) {
2384       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2385       ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2386       ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr);
2387       ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2388       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2389       for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0;
2390       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2391       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2392       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2393       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2394       ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2395       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2396       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2397       for (i=0;i<pcis->n;i++) {
2398         if (array[i] == 1.0) {
2399           ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr);
2400           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr);
2401         }
2402       }
2403       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2404       for (i=0;i<pcis->n;i++) {
2405         if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
2406       }
2407       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2408       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2409       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2410       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2411       ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
2412       ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr);
2413       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2414     }
2415     ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
2416   }
2417 
2418   if (dbg_flag) {
2419     ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr);
2420     if (dbg_flag > 1) {
2421       ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
2422       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2423       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2424       for (i=0;i<pcbddc->local_primal_size;i++) {
2425         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]);
2426       }
2427       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2428     }
2429   }
2430 
2431   im_active = 0;
2432   if (pcis->n) im_active = 1;
2433   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr);
2434 
2435   /* adapt coarse problem type */
2436 #if defined(PETSC_HAVE_METIS)
2437   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2438     if (pcbddc->current_level < pcbddc->max_levels) {
2439       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2440         if (dbg_flag) {
2441           ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2442          ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2443         }
2444         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2445       }
2446     } else {
2447       if (dbg_flag) {
2448         ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2449         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2450       }
2451       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2452     }
2453   }
2454 #else
2455   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2456 #endif
2457 
2458   switch(pcbddc->coarse_problem_type){
2459 
2460     case(MULTILEVEL_BDDC):   /* we define a coarse mesh where subdomains are elements */
2461     {
2462 #if defined(PETSC_HAVE_METIS)
2463       /* we need additional variables */
2464       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2465       MetisInt    *metis_coarse_subdivision;
2466       MetisInt    options[METIS_NOPTIONS];
2467       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2468       PetscMPIInt procs_jumps_coarse_comm;
2469       PetscMPIInt *coarse_subdivision;
2470       PetscMPIInt *total_count_recv;
2471       PetscMPIInt *total_ranks_recv;
2472       PetscMPIInt *displacements_recv;
2473       PetscMPIInt *my_faces_connectivity;
2474       PetscMPIInt *petsc_faces_adjncy;
2475       MetisInt    *faces_adjncy;
2476       MetisInt    *faces_xadj;
2477       PetscMPIInt *number_of_faces;
2478       PetscMPIInt *faces_displacements;
2479       PetscInt    *array_int;
2480       PetscMPIInt my_faces=0;
2481       PetscMPIInt total_faces=0;
2482       PetscInt    ranks_stretching_ratio;
2483 
2484       /* define some quantities */
2485       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2486       coarse_mat_type = MATIS;
2487       coarse_pc_type  = PCBDDC;
2488       coarse_ksp_type = KSPRICHARDSON;
2489 
2490       /* details of coarse decomposition */
2491       n_subdomains = active_procs;
2492       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2493       ranks_stretching_ratio = size_prec_comm/active_procs;
2494       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2495 
2496 #if 0
2497       PetscMPIInt *old_ranks;
2498       PetscInt    *new_ranks,*jj,*ii;
2499       MatPartitioning mat_part;
2500       IS coarse_new_decomposition,is_numbering;
2501       PetscViewer viewer_test;
2502       MPI_Comm    test_coarse_comm;
2503       PetscMPIInt test_coarse_color;
2504       Mat         mat_adj;
2505       /* Create new communicator for coarse problem splitting the old one */
2506       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2507          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2508       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2509       test_coarse_comm = MPI_COMM_NULL;
2510       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2511       if (im_active) {
2512         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2513         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2514         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2515         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2516         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2517         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2518         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2519         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2520         k = pcis->n_neigh-1;
2521         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2522         ii[0]=0;
2523         ii[1]=k;
2524         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2525         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2526         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2527         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2528         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2529         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2530         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2531         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2532         printf("Setting Nparts %d\n",n_parts);
2533         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2534         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2535         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2536         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2537         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2538         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2539         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2540         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2541         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2542         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2543         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2544         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2545         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2546       }
2547 #endif
2548 
2549       /* build CSR graph of subdomains' connectivity */
2550       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2551       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2552       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2553         for (j=0;j<pcis->n_shared[i];j++){
2554           array_int[ pcis->shared[i][j] ]+=1;
2555         }
2556       }
2557       for (i=1;i<pcis->n_neigh;i++){
2558         for (j=0;j<pcis->n_shared[i];j++){
2559           if (array_int[ pcis->shared[i][j] ] > 0 ){
2560             my_faces++;
2561             break;
2562           }
2563         }
2564       }
2565 
2566       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2567       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2568       my_faces=0;
2569       for (i=1;i<pcis->n_neigh;i++){
2570         for (j=0;j<pcis->n_shared[i];j++){
2571           if (array_int[ pcis->shared[i][j] ] > 0 ){
2572             my_faces_connectivity[my_faces]=pcis->neigh[i];
2573             my_faces++;
2574             break;
2575           }
2576         }
2577       }
2578       if (rank_prec_comm == master_proc) {
2579         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2580         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2581         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2582         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2583         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2584       }
2585       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2586       if (rank_prec_comm == master_proc) {
2587         faces_xadj[0]=0;
2588         faces_displacements[0]=0;
2589         j=0;
2590         for (i=1;i<size_prec_comm+1;i++) {
2591           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2592           if (number_of_faces[i-1]) {
2593             j++;
2594             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2595           }
2596         }
2597       }
2598       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2599       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2600       ierr = PetscFree(array_int);CHKERRQ(ierr);
2601       if (rank_prec_comm == master_proc) {
2602         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2603         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2604         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2605         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2606       }
2607 
2608       if ( rank_prec_comm == master_proc ) {
2609 
2610         PetscInt heuristic_for_metis=3;
2611 
2612         ncon=1;
2613         faces_nvtxs=n_subdomains;
2614         /* partition graoh induced by face connectivity */
2615         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2616         ierr = METIS_SetDefaultOptions(options);
2617         /* we need a contiguous partition of the coarse mesh */
2618         options[METIS_OPTION_CONTIG]=1;
2619         options[METIS_OPTION_NITER]=30;
2620         if (pcbddc->coarsening_ratio > 1) {
2621           if (n_subdomains>n_parts*heuristic_for_metis) {
2622             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2623             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2624             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2625             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2626           } else {
2627             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2628             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2629           }
2630         } else {
2631           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2632         }
2633         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2634         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2635         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2636 
2637         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2638         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2639         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2640         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2641       }
2642 
2643       /* Create new communicator for coarse problem splitting the old one */
2644       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2645         coarse_color=0;              /* for communicator splitting */
2646         active_rank=rank_prec_comm;  /* for insertion of matrix values */
2647       }
2648       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2649          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2650       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2651 
2652       if ( coarse_color == 0 ) {
2653         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2654         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2655       } else {
2656         rank_coarse_comm = MPI_PROC_NULL;
2657       }
2658 
2659       /* master proc take care of arranging and distributing coarse information */
2660       if (rank_coarse_comm == master_proc) {
2661         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2662         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2663         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2664         /* some initializations */
2665         displacements_recv[0]=0;
2666         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2667         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2668         for (j=0;j<size_coarse_comm;j++) {
2669           for (i=0;i<size_prec_comm;i++) {
2670           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2671           }
2672         }
2673         /* displacements needed for scatterv of total_ranks_recv */
2674       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2675 
2676         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2677         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2678         for (j=0;j<size_coarse_comm;j++) {
2679           for (i=0;i<size_prec_comm;i++) {
2680             if (coarse_subdivision[i]==j) {
2681               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2682               total_count_recv[j]+=1;
2683             }
2684           }
2685         }
2686         /*for (j=0;j<size_coarse_comm;j++) {
2687           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2688           for (i=0;i<total_count_recv[j];i++) {
2689             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2690           }
2691           printf("\n");
2692         }*/
2693 
2694         /* identify new decomposition in terms of ranks in the old communicator */
2695         for (i=0;i<n_subdomains;i++) {
2696           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2697         }
2698         /*printf("coarse_subdivision in old end new ranks\n");
2699         for (i=0;i<size_prec_comm;i++)
2700           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2701             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2702           } else {
2703             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2704           }
2705         printf("\n");*/
2706       }
2707 
2708       /* Scatter new decomposition for send details */
2709       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2710       /* Scatter receiving details to members of coarse decomposition */
2711       if ( coarse_color == 0) {
2712         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2713         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2714         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2715       }
2716 
2717       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2718       if (coarse_color == 0) {
2719         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2720         for (i=0;i<count_recv;i++)
2721           printf("%d ",ranks_recv[i]);
2722         printf("\n");
2723       }*/
2724 
2725       if (rank_prec_comm == master_proc) {
2726         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2727         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2728         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2729         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2730       }
2731 #endif
2732       break;
2733     }
2734 
2735     case(REPLICATED_BDDC):
2736 
2737       pcbddc->coarse_communications_type = GATHERS_BDDC;
2738       coarse_mat_type = MATSEQAIJ;
2739       coarse_pc_type  = PCLU;
2740       coarse_ksp_type  = KSPPREONLY;
2741       coarse_comm = PETSC_COMM_SELF;
2742       active_rank = rank_prec_comm;
2743       break;
2744 
2745     case(PARALLEL_BDDC):
2746 
2747       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2748       coarse_mat_type = MATAIJ;
2749       coarse_pc_type  = PCREDUNDANT;
2750       coarse_ksp_type  = KSPPREONLY;
2751       coarse_comm = prec_comm;
2752       active_rank = rank_prec_comm;
2753       break;
2754 
2755     case(SEQUENTIAL_BDDC):
2756       pcbddc->coarse_communications_type = GATHERS_BDDC;
2757       coarse_mat_type = MATAIJ;
2758       coarse_pc_type = PCLU;
2759       coarse_ksp_type  = KSPPREONLY;
2760       coarse_comm = PETSC_COMM_SELF;
2761       active_rank = master_proc;
2762       break;
2763   }
2764 
2765   switch(pcbddc->coarse_communications_type){
2766 
2767     case(SCATTERS_BDDC):
2768       {
2769         if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2770 
2771           IS coarse_IS;
2772 
2773           if(pcbddc->coarsening_ratio == 1) {
2774             ins_local_primal_size = pcbddc->local_primal_size;
2775             ins_local_primal_indices = pcbddc->local_primal_indices;
2776             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2777             /* nonzeros */
2778             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2779             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2780             for (i=0;i<ins_local_primal_size;i++) {
2781               dnz[i] = ins_local_primal_size;
2782             }
2783           } else {
2784             PetscMPIInt send_size;
2785             PetscMPIInt *send_buffer;
2786             PetscInt    *aux_ins_indices;
2787             PetscInt    ii,jj;
2788             MPI_Request *requests;
2789 
2790             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2791             /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */
2792             ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
2793             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2794             pcbddc->replicated_primal_size = count_recv;
2795             j = 0;
2796             for (i=0;i<count_recv;i++) {
2797               pcbddc->local_primal_displacements[i] = j;
2798               j += pcbddc->local_primal_sizes[ranks_recv[i]];
2799             }
2800             pcbddc->local_primal_displacements[count_recv] = j;
2801             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2802             /* allocate auxiliary space */
2803             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2804             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2805             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2806             /* allocate stuffs for message massing */
2807             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2808             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2809             /* send indices to be inserted */
2810             for (i=0;i<count_recv;i++) {
2811               send_size = pcbddc->local_primal_sizes[ranks_recv[i]];
2812               ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2813             }
2814             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2815               send_size = pcbddc->local_primal_size;
2816               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2817               for (i=0;i<send_size;i++) {
2818                 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2819               }
2820               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2821             }
2822             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2823             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2824               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2825             }
2826             j = 0;
2827             for (i=0;i<count_recv;i++) {
2828               ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i];
2829               localsizes2[i] = ii*ii;
2830               localdispl2[i] = j;
2831               j += localsizes2[i];
2832               jj = pcbddc->local_primal_displacements[i];
2833               /* it counts the coarse subdomains sharing the coarse node */
2834               for (k=0;k<ii;k++) {
2835                 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1;
2836               }
2837             }
2838             /* temp_coarse_mat_vals used to store matrix values to be received */
2839             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2840             /* evaluate how many values I will insert in coarse mat */
2841             ins_local_primal_size = 0;
2842             for (i=0;i<coarse_size;i++) {
2843               if (aux_ins_indices[i]) {
2844                 ins_local_primal_size++;
2845               }
2846             }
2847             /* evaluate indices I will insert in coarse mat */
2848             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2849             j = 0;
2850             for(i=0;i<coarse_size;i++) {
2851               if(aux_ins_indices[i]) {
2852                 ins_local_primal_indices[j] = i;
2853                 j++;
2854               }
2855             }
2856             /* processes partecipating in coarse problem receive matrix data from their friends */
2857             for (i=0;i<count_recv;i++) {
2858               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2859             }
2860             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2861               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2862               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2863             }
2864             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2865             /* nonzeros */
2866             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2867             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2868             /* use aux_ins_indices to realize a global to local mapping */
2869             j=0;
2870             for(i=0;i<coarse_size;i++){
2871               if(aux_ins_indices[i]==0){
2872                 aux_ins_indices[i]=-1;
2873               } else {
2874                 aux_ins_indices[i]=j;
2875                 j++;
2876               }
2877             }
2878             for (i=0;i<count_recv;i++) {
2879               j = pcbddc->local_primal_sizes[ranks_recv[i]];
2880               for (k=0;k<j;k++) {
2881                 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j;
2882               }
2883             }
2884             /* check */
2885             for (i=0;i<ins_local_primal_size;i++) {
2886               if (dnz[i] > ins_local_primal_size) {
2887                 dnz[i] = ins_local_primal_size;
2888               }
2889             }
2890             ierr = PetscFree(requests);CHKERRQ(ierr);
2891             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2892             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2893           }
2894           /* create local to global mapping needed by coarse MATIS */
2895           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2896           coarse_comm = prec_comm;
2897           active_rank = rank_prec_comm;
2898           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2899           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2900           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2901         } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2902           /* arrays for values insertion */
2903           ins_local_primal_size = pcbddc->local_primal_size;
2904           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2905           ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr);
2906           for (j=0;j<ins_local_primal_size;j++){
2907             ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
2908             for (i=0;i<ins_local_primal_size;i++) {
2909               ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2910             }
2911           }
2912         }
2913         break;
2914 
2915     }
2916 
2917     case(GATHERS_BDDC):
2918       {
2919 
2920         PetscMPIInt mysize,mysize2;
2921         PetscMPIInt *send_buffer;
2922 
2923         if (rank_prec_comm==active_rank) {
2924           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2925           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
2926           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2927           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2928           /* arrays for values insertion */
2929       for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
2930           localdispl2[0]=0;
2931       for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
2932           j=0;
2933       for (i=0;i<size_prec_comm;i++) j+=localsizes2[i];
2934           ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2935         }
2936 
2937         mysize=pcbddc->local_primal_size;
2938         mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
2939         ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2940     for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2941 
2942         if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){
2943           ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2944           ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr);
2945         } else {
2946           ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr);
2947           ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr);
2948         }
2949         ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2950         break;
2951       }/* switch on coarse problem and communications associated with finished */
2952   }
2953 
2954   /* Now create and fill up coarse matrix */
2955   if ( rank_prec_comm == active_rank ) {
2956 
2957     Mat matis_coarse_local_mat;
2958 
2959     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2960       ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr);
2961       ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr);
2962       ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr);
2963       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2964       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2965       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2966       ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2967       ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2968     } else {
2969       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr);
2970       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2971       ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2972       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2973       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2974       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2975       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2976       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2977     }
2978     /* preallocation */
2979     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2980 
2981       PetscInt lrows,lcols,bs;
2982 
2983       ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr);
2984       ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr);
2985       ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr);
2986 
2987       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2988 
2989         Vec         vec_dnz,vec_onz;
2990         PetscScalar *my_dnz,*my_onz,*array;
2991         PetscInt    *mat_ranges,*row_ownership;
2992         PetscInt    coarse_index_row,coarse_index_col,owner;
2993 
2994         ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr);
2995         ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2996         ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr);
2997         ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr);
2998         ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2999 
3000         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr);
3001         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr);
3002         ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3003         ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3004 
3005         ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr);
3006         ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
3007         for (i=0;i<size_prec_comm;i++) {
3008           for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
3009             row_ownership[j]=i;
3010           }
3011         }
3012 
3013         for (i=0;i<pcbddc->local_primal_size;i++) {
3014           coarse_index_row = pcbddc->local_primal_indices[i];
3015           owner = row_ownership[coarse_index_row];
3016           for (j=i;j<pcbddc->local_primal_size;j++) {
3017             owner = row_ownership[coarse_index_row];
3018             coarse_index_col = pcbddc->local_primal_indices[j];
3019             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) {
3020               my_dnz[i] += 1.0;
3021             } else {
3022               my_onz[i] += 1.0;
3023             }
3024             if (i != j) {
3025               owner = row_ownership[coarse_index_col];
3026               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) {
3027                 my_dnz[j] += 1.0;
3028               } else {
3029                 my_onz[j] += 1.0;
3030               }
3031             }
3032           }
3033         }
3034         ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
3035         ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
3036         if (pcbddc->local_primal_size) {
3037           ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
3038           ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
3039         }
3040         ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
3041         ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
3042         ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
3043         ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
3044         j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
3045         ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
3046         for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
3047 
3048         ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
3049         ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
3050         for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
3051 
3052         ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
3053         ierr = PetscFree(my_dnz);CHKERRQ(ierr);
3054         ierr = PetscFree(my_onz);CHKERRQ(ierr);
3055         ierr = PetscFree(row_ownership);CHKERRQ(ierr);
3056         ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
3057         ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
3058       } else {
3059         for (k=0;k<size_prec_comm;k++){
3060           offset=pcbddc->local_primal_displacements[k];
3061           offset2=localdispl2[k];
3062           ins_local_primal_size = pcbddc->local_primal_sizes[k];
3063           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3064           for (j=0;j<ins_local_primal_size;j++){
3065             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3066           }
3067           for (j=0;j<ins_local_primal_size;j++) {
3068             ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr);
3069           }
3070           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3071         }
3072       }
3073 
3074       /* check */
3075       for (i=0;i<lrows;i++) {
3076         if (dnz[i]>lcols) dnz[i]=lcols;
3077         if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols;
3078       }
3079       ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr);
3080       ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr);
3081       ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3082     } else {
3083       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
3084       ierr = PetscFree(dnz);CHKERRQ(ierr);
3085     }
3086     /* insert values */
3087     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
3088       ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3089     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3090       if (pcbddc->coarsening_ratio == 1) {
3091         ins_coarse_mat_vals = coarse_submat_vals;
3092         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
3093       } else {
3094         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3095         for (k=0;k<pcbddc->replicated_primal_size;k++) {
3096           offset = pcbddc->local_primal_displacements[k];
3097           offset2 = localdispl2[k];
3098           ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k];
3099           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3100           for (j=0;j<ins_local_primal_size;j++){
3101             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3102           }
3103           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3104           ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3105           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3106         }
3107       }
3108       ins_local_primal_indices = 0;
3109       ins_coarse_mat_vals = 0;
3110     } else {
3111       for (k=0;k<size_prec_comm;k++){
3112         offset=pcbddc->local_primal_displacements[k];
3113         offset2=localdispl2[k];
3114         ins_local_primal_size = pcbddc->local_primal_sizes[k];
3115         ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3116         for (j=0;j<ins_local_primal_size;j++){
3117           ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3118         }
3119         ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3120         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3121         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3122       }
3123       ins_local_primal_indices = 0;
3124       ins_coarse_mat_vals = 0;
3125     }
3126     ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3127     ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3128     /* symmetry of coarse matrix */
3129     if (issym) {
3130       ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3131     }
3132     ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3133   }
3134 
3135   /* create loc to glob scatters if needed */
3136   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
3137      IS local_IS,global_IS;
3138      ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr);
3139      ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr);
3140      ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3141      ierr = ISDestroy(&local_IS);CHKERRQ(ierr);
3142      ierr = ISDestroy(&global_IS);CHKERRQ(ierr);
3143   }
3144 
3145   /* free memory no longer needed */
3146   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
3147   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
3148   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
3149   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
3150   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
3151   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
3152 
3153   /* Compute coarse null space */
3154   CoarseNullSpace = 0;
3155   if (pcbddc->NullSpace) {
3156     ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr);
3157   }
3158 
3159   /* KSP for coarse problem */
3160   if (rank_prec_comm == active_rank) {
3161     PetscBool isbddc=PETSC_FALSE;
3162 
3163     ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr);
3164     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3165     ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3166     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr);
3167     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3168     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3169     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3170     /* Allow user's customization */
3171     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
3172     /* Set Up PC for coarse problem BDDC */
3173     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3174       i = pcbddc->current_level+1;
3175       ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr);
3176       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3177       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3178       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
3179       if (CoarseNullSpace) {
3180         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3181       }
3182       if (dbg_flag) {
3183         ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr);
3184         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3185       }
3186     } else {
3187       if (CoarseNullSpace) {
3188         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3189       }
3190     }
3191     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3192     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3193 
3194     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr);
3195     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3196     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3197     if (j == 1) {
3198       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3199       if (isbddc) {
3200         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
3201       }
3202     }
3203   }
3204   /* Check coarse problem if requested */
3205   if ( dbg_flag && rank_prec_comm == active_rank ) {
3206     KSP check_ksp;
3207     PC  check_pc;
3208     Vec check_vec;
3209     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
3210     KSPType check_ksp_type;
3211 
3212     /* Create ksp object suitable for extreme eigenvalues' estimation */
3213     ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr);
3214     ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3215     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3216     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3217       if (issym) check_ksp_type = KSPCG;
3218       else check_ksp_type = KSPGMRES;
3219       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3220     } else {
3221       check_ksp_type = KSPPREONLY;
3222     }
3223     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3224     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3225     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3226     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3227     /* create random vec */
3228     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3229     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3230     if (CoarseNullSpace) {
3231       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3232     }
3233     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3234     /* solve coarse problem */
3235     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3236     if (CoarseNullSpace) {
3237       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3238     }
3239     /* check coarse problem residual error */
3240     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3241     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3242     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3243     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3244     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3245     /* get eigenvalue estimation if inexact */
3246     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3247       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3248       ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr);
3249       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr);
3250       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3251     }
3252     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3253     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3254     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3255   }
3256   if (dbg_flag) {
3257     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3258   }
3259   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3260 
3261   PetscFunctionReturn(0);
3262 }
3263 
3264