xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 4641a718e2ea6ffa1549c46d4bdd764ae72722d8)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
10   PetscErrorCode    ierr;
11 
12   PetscFunctionBegin;
13   /* Compute matrix after change of basis and extract local submatrices */
14   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
15 
16   /* Allocate needed vectors */
17   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
18 
19   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
20   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
21 
22   /* Setup local solvers ksp_D and ksp_R */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /* setup local correction and local part of coarse basis */
31   ierr = PCBDDCSetUpCoarseLocal(pc);CHKERRQ(ierr);
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNCT__
36 #define __FUNCT__ "PCBDDCSetLevel"
37 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
38 {
39   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
40 
41   PetscFunctionBegin;
42   pcbddc->current_level=level;
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "PCBDDCResetCustomization"
48 PetscErrorCode PCBDDCResetCustomization(PC pc)
49 {
50   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
51   PetscInt       i;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
60   for (i=0;i<pcbddc->n_ISForDofs;i++) {
61     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
62   }
63   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCCreateWorkVectors"
118 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
119 {
120   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121   PC_IS          *pcis = (PC_IS*)pc->data;
122   VecType        impVecType;
123   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
128   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
129   local_primal_size = n_constraints+n_vertices;
130   n_R = pcis->n-n_vertices;
131   /* local work vectors */
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
134   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
135   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
136   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
137   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
138   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
139   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
140   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
141   if (n_constraints) {
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
145   }
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
151 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc)
152 {
153   PetscErrorCode         ierr;
154   /* pointers to pcis and pcbddc */
155   PC_IS*                 pcis = (PC_IS*)pc->data;
156   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
157   /* submatrices of local problem */
158   Mat                    A_RV,A_VR,A_VV;
159   /* working matrices */
160   Mat                    M1,M2,M3,C_CR;
161   /* working vectors */
162   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
163   /* additional working stuff */
164   IS                     is_aux;
165   ISLocalToGlobalMapping BtoNmap;
166   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
167   const PetscScalar      *array,*row_cmat_values;
168   const PetscInt         *row_cmat_indices,*idx_R_local;
169   PetscInt               *vertices,*idx_V_B,*auxindices;
170   PetscInt               n_vertices,n_constraints,size_of_constraint;
171   PetscInt               i,j,n_R,n_D,n_B;
172   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
173   /* Vector and matrix types */
174   VecType                impVecType;
175   MatType                impMatType;
176   /* some shortcuts to scalars */
177   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
178   /* for debugging purposes */
179   PetscReal              *coarsefunctions_errors,*constraints_errors;
180 
181   PetscFunctionBegin;
182   /* get number of vertices and their local indices */
183   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
184   n_constraints = pcbddc->local_primal_size-n_vertices;
185   /* Set Non-overlapping dimensions */
186   n_B = pcis->n_B; n_D = pcis->n - n_B;
187   n_R = pcis->n-n_vertices;
188 
189   /* Set types for local objects needed by BDDC precondtioner */
190   impMatType = MATSEQDENSE;
191   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
192 
193   /* Allocating some extra storage just to be safe */
194   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
195   for (i=0;i<pcis->n;i++) auxindices[i]=i;
196 
197   /* vertices in boundary numbering */
198   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
199   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
200   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
201   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
202   if (i != n_vertices) {
203     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
204   }
205 
206   /* some work vectors on vertices and/or constraints */
207   if (n_vertices) {
208     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
209     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
210     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
211     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
212   }
213   if (n_constraints) {
214     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
215     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
216   }
217 
218   /* Precompute stuffs needed for preprocessing and application of BDDC*/
219   if (n_constraints) {
220     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
221     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
222     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
223     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
224 
225     /* Extract constraints on R nodes: C_{CR}  */
226     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
227     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
228     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
229 
230     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
231     for (i=0;i<n_constraints;i++) {
232       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
233       /* Get row of constraint matrix in R numbering */
234       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
235       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
237       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
238       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
239       /* Solve for row of constraint matrix in R numbering */
240       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
241       /* Set values in local_auxmat2 */
242       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
243       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
244       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
245     }
246     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
249 
250     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
251     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
252     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
253     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
254     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
255     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
256     ierr = MatSetUp(M1);CHKERRQ(ierr);
257     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
258     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
259     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
260     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
261     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
262     ierr = MatDestroy(&M2);CHKERRQ(ierr);
263     ierr = MatDestroy(&M3);CHKERRQ(ierr);
264     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
265     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
266   }
267 
268   /* Get submatrices from subdomain matrix */
269   if (n_vertices) {
270     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
271     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
272     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
273     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
274     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
275   }
276 
277   /* Matrix of coarse basis functions (local) */
278   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
279   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
280   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
281   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
282   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
283     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
284     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
285     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
286     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
287   }
288 
289   if (pcbddc->dbg_flag) {
290     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
291     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
292     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
293   }
294   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
295   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
296 
297   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
298 
299   /* vertices */
300   for (i=0;i<n_vertices;i++) {
301     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
302     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
303     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
304     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
305     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
306     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
307     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
308     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
309     if (n_constraints) {
310       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
311       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
312       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
313     }
314     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
315     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
316 
317     /* Set values in coarse basis function and subdomain part of coarse_mat */
318     /* coarse basis functions */
319     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
320     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
322     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
323     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
324     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
325     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
326     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
327       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
328       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
329       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
330       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
331       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
332     }
333     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
334     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
335     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
336     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
337     if (n_constraints) {
338       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
339       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
340       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
341     }
342 
343     /* check */
344     if (pcbddc->dbg_flag) {
345       /* assemble subdomain vector on local nodes */
346       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
348       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
350       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
351       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
352       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
353       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
354       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
355       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
356       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
357       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
358       if (n_constraints) {
359         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
360         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
361         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
362       }
363       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
364       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
365       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
366       /* check saddle point solution */
367       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
368       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
369       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
370       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
371       /* shift by the identity matrix */
372       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
373       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
374       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
375       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
376     }
377   }
378 
379   /* constraints */
380   for (i=0;i<n_constraints;i++) {
381     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
382     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
383     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
384     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
385     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
386     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
387     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
388     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
389     if (n_vertices) {
390       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
391     }
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     j = i+n_vertices; /* don't touch this! */
395     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
396     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
400     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     if (n_vertices) {
410       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
411       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
412       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     }
414     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
416     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
417 
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
425       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
426       /* assemble subdomain vector of lagrange multipliers */
427       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
428       if (n_vertices) {
429         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       }
433       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
439       /* check saddle point solution */
440       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
443       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
444       /* shift by the identity matrix */
445       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
449     }
450   }
451   /* call assembling routines for local coarse basis */
452   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
454   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
455     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   }
458 
459   /* compute other basis functions for non-symmetric problems */
460   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
461   if (!setsym || (setsym && !issym)) {
462     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
463     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
464     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
465     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
466     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
467       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
468       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
469       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
470       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
471     }
472     for (i=0;i<pcbddc->local_primal_size;i++) {
473       if (n_constraints) {
474         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
475         for (j=0;j<n_constraints;j++) {
476           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
477         }
478         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
479         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
480       }
481       if (i<n_vertices) {
482         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
483         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
484         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
485         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
486         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
487         if (n_constraints) {
488           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
489         }
490       } else {
491         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
492       }
493       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
494       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
495       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
498       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
499       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
500       if (i<n_vertices) {
501         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
502       }
503       if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
504         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
507         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
509       }
510 
511       if (pcbddc->dbg_flag) {
512         /* assemble subdomain vector on nodes */
513         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
514         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
515         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
516         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
517         if (i<n_vertices) {
518           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
519         }
520         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
521         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
522         /* assemble subdomain vector of lagrange multipliers */
523         for (j=0;j<pcbddc->local_primal_size;j++) {
524           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
525         }
526         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
527         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
528         /* check saddle point solution */
529         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
530         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
531         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
532         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
533         /* shift by the identity matrix */
534         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
535         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
536         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
537         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
538       }
539     }
540     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
541     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
542     if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
543       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
544       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     }
546   }
547   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
548   /* Checking coarse_sub_mat and coarse basis functios */
549   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
550   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
551   if (pcbddc->dbg_flag) {
552     Mat         coarse_sub_mat;
553     Mat         AUXMAT,TM1,TM2,TM3,TM4;
554     Mat         coarse_phi_D,coarse_phi_B;
555     Mat         coarse_psi_D,coarse_psi_B;
556     Mat         A_II,A_BB,A_IB,A_BI;
557     MatType     checkmattype=MATSEQAIJ;
558     PetscReal   real_value;
559 
560     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
561     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
562     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
563     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
564     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
565     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
566     if (pcbddc->coarse_psi_B) {
567       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
568       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
569     }
570     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
571 
572     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
573     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
574     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
575     if (pcbddc->coarse_psi_B) {
576       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
577       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
578       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
579       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
580       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
581       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
582       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
583       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
584       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
585       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
586       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
587       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
588     } else {
589       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
590       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
591       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
592       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
593       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
594       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
595       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
596       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
597     }
598     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
599     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
600     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
601     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
602     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
604     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
605     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
606     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
607     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
608     for (i=0;i<pcbddc->local_primal_size;i++) {
609       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
610     }
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
612     for (i=0;i<pcbddc->local_primal_size;i++) {
613       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
614     }
615     if (pcbddc->coarse_psi_B) {
616       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
617       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
618         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
619       }
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
621       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
622         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
623       }
624     }
625     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
626     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
627     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
628     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
629     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
630     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
631     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
632     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
633     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
634     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
635     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
636     if (pcbddc->coarse_psi_B) {
637       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
638       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
639     }
640     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
641     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
642     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
643     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
644   }
645   /* free memory */
646   if (n_vertices) {
647     ierr = PetscFree(vertices);CHKERRQ(ierr);
648     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
649     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
652     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
653   }
654   if (n_constraints) {
655     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
656     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
657     ierr = MatDestroy(&M1);CHKERRQ(ierr);
658     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
659   }
660   ierr = PetscFree(auxindices);CHKERRQ(ierr);
661   /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */
662   ierr = PCBDDCSetUpCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr);
663   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
669 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
670 {
671   PC_IS*            pcis = (PC_IS*)(pc->data);
672   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
673   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
674   /* manage repeated solves */
675   MatReuse          reuse;
676   MatStructure      matstruct;
677   PetscErrorCode    ierr;
678 
679   PetscFunctionBegin;
680   /* get mat flags */
681   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
682   reuse = MAT_INITIAL_MATRIX;
683   if (pc->setupcalled) {
684     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
685     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
686     if (matstruct == SAME_NONZERO_PATTERN) {
687       reuse = MAT_REUSE_MATRIX;
688     } else {
689       reuse = MAT_INITIAL_MATRIX;
690     }
691   }
692   if (reuse == MAT_INITIAL_MATRIX) {
693     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
694     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
695     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
696     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
698   }
699 
700   /* transform local matrices if needed */
701   if (pcbddc->use_change_of_basis) {
702     Mat         change_mat_all;
703     PetscScalar *row_cmat_values;
704     PetscInt    *row_cmat_indices;
705     PetscInt    *nnz,*is_indices,*temp_indices;
706     PetscInt    i,j,k,n_D,n_B;
707 
708     /* Get Non-overlapping dimensions */
709     n_B = pcis->n_B;
710     n_D = pcis->n-n_B;
711 
712     /* compute nonzero structure of change of basis on all local nodes */
713     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
714     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
715     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
716     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
718     k=1;
719     for (i=0;i<n_B;i++) {
720       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
721       nnz[is_indices[i]]=j;
722       if (k < j) k = j;
723       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
724     }
725     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
726     /* assemble change of basis matrix on the whole set of local dofs */
727     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
728     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
729     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
730     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
731     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
732     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
733     for (i=0;i<n_D;i++) {
734       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
735     }
736     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
738     for (i=0;i<n_B;i++) {
739       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
740       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
741       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
742       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
743     }
744     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
746     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
747     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
748     if (i==1) {
749       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
750     } else {
751       Mat work_mat;
752       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
753       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
754       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
755     }
756     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
757     ierr = PetscFree(nnz);CHKERRQ(ierr);
758     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
759   } else {
760     /* without change of basis, the local matrix is unchanged */
761     if (!pcbddc->local_mat) {
762       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
763       pcbddc->local_mat = matis->A;
764     }
765   }
766 
767   /* get submatrices */
768   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
769   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
770   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
777 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
778 {
779   PC_IS*         pcis = (PC_IS*)(pc->data);
780   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
781   IS             is_aux1,is_aux2;
782   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
783   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
784   PetscBT        bitmask;
785   PetscErrorCode ierr;
786 
787   PetscFunctionBegin;
788   /* Set Non-overlapping dimensions */
789   n_B = pcis->n_B; n_D = pcis->n - n_B;
790   /* get vertex indices from constraint matrix */
791   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
792   /* Set number of constraints */
793   n_constraints = pcbddc->local_primal_size-n_vertices;
794   /* create auxiliary bitmask */
795   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
796   for (i=0;i<n_vertices;i++) {
797     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
798   }
799   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
800   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
801   for (i=0, n_R=0; i<pcis->n; i++) {
802     if (!PetscBTLookup(bitmask,i)) {
803       idx_R_local[n_R] = i;
804       n_R++;
805     }
806   }
807   ierr = PetscFree(vertices);CHKERRQ(ierr);
808   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
809 
810   /* print some info if requested */
811   if (pcbddc->dbg_flag) {
812     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
813     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
814     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
815     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
816     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
817     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
818     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
819   }
820 
821   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
822   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
823   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
824   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
825   for (i=0; i<n_D; i++) {
826     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
827   }
828   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
829   for (i=0, j=0; i<n_R; i++) {
830     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
831       aux_array1[j++] = i;
832     }
833   }
834   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
835   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
836   for (i=0, j=0; i<n_B; i++) {
837     if (!PetscBTLookup(bitmask,is_indices[i])) {
838       aux_array2[j++] = i;
839     }
840   }
841   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
842   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
843   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
844   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
845   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
846 
847   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
848     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
849     for (i=0, j=0; i<n_R; i++) {
850       if (PetscBTLookup(bitmask,idx_R_local[i])) {
851         aux_array1[j++] = i;
852       }
853     }
854     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
855     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
856     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
857   }
858   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
864 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
865 {
866   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
867 
868   PetscFunctionBegin;
869   pcbddc->use_exact_dirichlet=use;
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
875 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
876 {
877   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
878   PC_IS          *pcis = (PC_IS*)pc->data;
879   PC             pc_temp;
880   Mat            A_RR;
881   Vec            vec1,vec2,vec3;
882   MatStructure   matstruct;
883   PetscScalar    m_one = -1.0;
884   PetscReal      value;
885   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
886   PetscErrorCode ierr;
887 
888   PetscFunctionBegin;
889   /* Creating PC contexts for local Dirichlet and Neumann problems */
890   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
891 
892   /* DIRICHLET PROBLEM */
893   /* Matrix for Dirichlet problem is pcis->A_II */
894   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
895   if (!pcbddc->ksp_D) { /* create object if not yet build */
896     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
897     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
898     /* default */
899     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
900     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
901     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
902     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
903     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
904   }
905   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
906   /* Allow user's customization */
907   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
908   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
909   if (!n_D) {
910     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
911     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
912   }
913   /* Set Up KSP for Dirichlet problem of BDDC */
914   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
915   /* set ksp_D into pcis data */
916   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
917   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
918   pcis->ksp_D = pcbddc->ksp_D;
919 
920   /* NEUMANN PROBLEM */
921   /* Matrix for Neumann problem is A_RR -> we need to create it */
922   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
923   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
924   if (!pcbddc->ksp_R) { /* create object if not yet build */
925     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
926     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
927     /* default */
928     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
929     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
930     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
931     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
932     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
933   }
934   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
935   /* Allow user's customization */
936   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
937   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
938   if (!n_R) {
939     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
940     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
941   }
942   /* Set Up KSP for Neumann problem of BDDC */
943   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
944 
945   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
946 
947   /* Dirichlet */
948   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
949   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
950   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
951   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
952   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
953   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
954   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
955   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
956   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
957   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
958   /* need to be adapted? */
959   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
960   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
961   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
962   /* print info */
963   if (pcbddc->dbg_flag) {
964     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
965     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
966     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
967     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
968     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
969   }
970   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) {
971     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
972   }
973 
974   /* Neumann */
975   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
976   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
977   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
978   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
979   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
980   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
981   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
982   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
983   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
984   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
985   /* need to be adapted? */
986   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
987   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
988   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
989   /* print info */
990   if (pcbddc->dbg_flag) {
991     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
992     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
993   }
994   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
995     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
996   }
997 
998   /* free Neumann problem's matrix */
999   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1005 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1006 {
1007   PetscErrorCode ierr;
1008   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1009 
1010   PetscFunctionBegin;
1011   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1012   if (pcbddc->local_auxmat1) {
1013     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1014     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1015   }
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1021 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1022 {
1023   PetscErrorCode ierr;
1024   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1025   PC_IS*            pcis = (PC_IS*)  (pc->data);
1026   const PetscScalar zero = 0.0;
1027 
1028   PetscFunctionBegin;
1029   /* Application of PHI^T (or PSI^T)  */
1030   if (pcbddc->coarse_psi_B) {
1031     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1032     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1033   } else {
1034     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1035     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1036   }
1037   /* Scatter data of coarse_rhs */
1038   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1039   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1040 
1041   /* Local solution on R nodes */
1042   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1043   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045   if (pcbddc->inexact_prec_type) {
1046     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1048   }
1049   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1050   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1051   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053   if (pcbddc->inexact_prec_type) {
1054     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056   }
1057 
1058   /* Coarse solution */
1059   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1060   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1061     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1062   }
1063   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1065 
1066   /* Sum contributions from two levels */
1067   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1068   if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1074 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1075 {
1076   PetscErrorCode ierr;
1077   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1078 
1079   PetscFunctionBegin;
1080   switch (pcbddc->coarse_communications_type) {
1081     case SCATTERS_BDDC:
1082       ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1083       break;
1084     case GATHERS_BDDC:
1085       break;
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1092 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1093 {
1094   PetscErrorCode ierr;
1095   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1096   PetscScalar*   array_to;
1097   PetscScalar*   array_from;
1098   MPI_Comm       comm;
1099   PetscInt       i;
1100 
1101   PetscFunctionBegin;
1102   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1103   switch (pcbddc->coarse_communications_type) {
1104     case SCATTERS_BDDC:
1105       ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1106       break;
1107     case GATHERS_BDDC:
1108       if (vec_from) {
1109         ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr);
1110       }
1111       if (vec_to) {
1112         ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr);
1113       }
1114       switch(pcbddc->coarse_problem_type){
1115         case SEQUENTIAL_BDDC:
1116           if (smode == SCATTER_FORWARD) {
1117             ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1118             if (vec_to) {
1119               if (imode == ADD_VALUES) {
1120                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1121                   array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1122                 }
1123               } else {
1124                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1125                   array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1126                 }
1127               }
1128             }
1129           } else {
1130             if (vec_from) {
1131               if (imode == ADD_VALUES) {
1132                 MPI_Comm vec_from_comm;
1133                 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr);
1134                 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type);
1135               }
1136               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1137                 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
1138               }
1139             }
1140             ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1141           }
1142           break;
1143         case REPLICATED_BDDC:
1144           if (smode == SCATTER_FORWARD) {
1145             ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr);
1146             if (imode == ADD_VALUES) {
1147               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1148                 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1149               }
1150             } else {
1151               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1152                 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1153               }
1154             }
1155           } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
1156             if (imode == ADD_VALUES) {
1157               for (i=0;i<pcbddc->local_primal_size;i++) {
1158                 array_to[i]+=array_from[pcbddc->local_primal_indices[i]];
1159               }
1160             } else {
1161               for (i=0;i<pcbddc->local_primal_size;i++) {
1162                 array_to[i]=array_from[pcbddc->local_primal_indices[i]];
1163               }
1164             }
1165           }
1166           break;
1167         case MULTILEVEL_BDDC:
1168           break;
1169         case PARALLEL_BDDC:
1170           break;
1171       }
1172       if (vec_from) {
1173         ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr);
1174       }
1175       if (vec_to) {
1176         ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr);
1177       }
1178       break;
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 /* uncomment for testing purposes */
1184 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1185 #undef __FUNCT__
1186 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1187 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1188 {
1189   PetscErrorCode    ierr;
1190   PC_IS*            pcis = (PC_IS*)(pc->data);
1191   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1192   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1193   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1194   MatType           impMatType=MATSEQAIJ;
1195   /* one and zero */
1196   PetscScalar       one=1.0,zero=0.0;
1197   /* space to store constraints and their local indices */
1198   PetscScalar       *temp_quadrature_constraint;
1199   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1200   /* iterators */
1201   PetscInt          i,j,k,total_counts,temp_start_ptr;
1202   /* stuff to store connected components stored in pcbddc->mat_graph */
1203   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1204   PetscInt          n_ISForFaces,n_ISForEdges;
1205   PetscBool         get_faces,get_edges,get_vertices;
1206   /* near null space stuff */
1207   MatNullSpace      nearnullsp;
1208   const Vec         *nearnullvecs;
1209   Vec               *localnearnullsp;
1210   PetscBool         nnsp_has_cnst;
1211   PetscInt          nnsp_size;
1212   PetscScalar       *array;
1213   /* BLAS integers */
1214   PetscBLASInt      lwork,lierr;
1215   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1216   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1217   /* LAPACK working arrays for SVD or POD */
1218   PetscBool         skip_lapack;
1219   PetscScalar       *work;
1220   PetscReal         *singular_vals;
1221 #if defined(PETSC_USE_COMPLEX)
1222   PetscReal         *rwork;
1223 #endif
1224 #if defined(PETSC_MISSING_LAPACK_GESVD)
1225   PetscBLASInt      Blas_one_2=1;
1226   PetscScalar       *temp_basis,*correlation_mat;
1227 #else
1228   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1229   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1230 #endif
1231   /* change of basis */
1232   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1233   PetscBool         boolforchange;
1234   PetscBT           touched,change_basis;
1235   /* auxiliary stuff */
1236   PetscInt          *nnz,*is_indices,*local_to_B;
1237   /* some quantities */
1238   PetscInt          n_vertices,total_primal_vertices;
1239   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1240 
1241 
1242   PetscFunctionBegin;
1243   /* Get index sets for faces, edges and vertices from graph */
1244   get_faces = PETSC_TRUE;
1245   get_edges = PETSC_TRUE;
1246   get_vertices = PETSC_TRUE;
1247   if (pcbddc->vertices_flag) {
1248     get_faces = PETSC_FALSE;
1249     get_edges = PETSC_FALSE;
1250   }
1251   if (pcbddc->constraints_flag) {
1252     get_vertices = PETSC_FALSE;
1253   }
1254   if (pcbddc->faces_flag) {
1255     get_edges = PETSC_FALSE;
1256   }
1257   if (pcbddc->edges_flag) {
1258     get_faces = PETSC_FALSE;
1259   }
1260   /* default */
1261   if (!get_faces && !get_edges && !get_vertices) {
1262     get_vertices = PETSC_TRUE;
1263   }
1264   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1265   /* print some info */
1266   if (pcbddc->dbg_flag) {
1267     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1268     i = 0;
1269     if (ISForVertices) {
1270       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1271     }
1272     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1273     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1274     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1275     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1276   }
1277   /* check if near null space is attached to global mat */
1278   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1279   if (nearnullsp) {
1280     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1281   } else { /* if near null space is not provided BDDC uses constants by default */
1282     nnsp_size = 0;
1283     nnsp_has_cnst = PETSC_TRUE;
1284   }
1285   /* get max number of constraints on a single cc */
1286   max_constraints = nnsp_size;
1287   if (nnsp_has_cnst) max_constraints++;
1288 
1289   /*
1290        Evaluate maximum storage size needed by the procedure
1291        - temp_indices will contain start index of each constraint stored as follows
1292        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1293        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1294        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1295                                                                                                                                                          */
1296   total_counts = n_ISForFaces+n_ISForEdges;
1297   total_counts *= max_constraints;
1298   n_vertices = 0;
1299   if (ISForVertices) {
1300     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1301   }
1302   total_counts += n_vertices;
1303   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1304   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1305   total_counts = 0;
1306   max_size_of_constraint = 0;
1307   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1308     if (i<n_ISForEdges) {
1309       used_IS = &ISForEdges[i];
1310     } else {
1311       used_IS = &ISForFaces[i-n_ISForEdges];
1312     }
1313     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1314     total_counts += j;
1315     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1316   }
1317   total_counts *= max_constraints;
1318   total_counts += n_vertices;
1319   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1320   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1321   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1322   /* local to boundary numbering */
1323   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1324   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1325   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1326   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1327   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1328   /* get local part of global near null space vectors */
1329   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1330   for (k=0;k<nnsp_size;k++) {
1331     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1332     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   }
1335 
1336   /* whether or not to skip lapack calls */
1337   skip_lapack = PETSC_TRUE;
1338   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1339 
1340   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1341   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1342     PetscScalar temp_work;
1343 #if defined(PETSC_MISSING_LAPACK_GESVD)
1344     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1345     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1346     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1347     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1348 #if defined(PETSC_USE_COMPLEX)
1349     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1350 #endif
1351     /* now we evaluate the optimal workspace using query with lwork=-1 */
1352     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1353     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1354     lwork = -1;
1355     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1356 #if !defined(PETSC_USE_COMPLEX)
1357     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1358 #else
1359     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1360 #endif
1361     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1362     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1363 #else /* on missing GESVD */
1364     /* SVD */
1365     PetscInt max_n,min_n;
1366     max_n = max_size_of_constraint;
1367     min_n = max_constraints;
1368     if (max_size_of_constraint < max_constraints) {
1369       min_n = max_size_of_constraint;
1370       max_n = max_constraints;
1371     }
1372     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1373 #if defined(PETSC_USE_COMPLEX)
1374     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1375 #endif
1376     /* now we evaluate the optimal workspace using query with lwork=-1 */
1377     lwork = -1;
1378     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1379     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1380     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1381     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1382 #if !defined(PETSC_USE_COMPLEX)
1383     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1384 #else
1385     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1386 #endif
1387     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1388     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1389 #endif /* on missing GESVD */
1390     /* Allocate optimal workspace */
1391     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1392     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1393   }
1394   /* Now we can loop on constraining sets */
1395   total_counts = 0;
1396   temp_indices[0] = 0;
1397   /* vertices */
1398   if (ISForVertices) {
1399     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1400     if (nnsp_has_cnst) { /* consider all vertices */
1401       for (i=0;i<n_vertices;i++) {
1402         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1403         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1404         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1405         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1406         total_counts++;
1407       }
1408     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1409       PetscBool used_vertex;
1410       for (i=0;i<n_vertices;i++) {
1411         used_vertex = PETSC_FALSE;
1412         k = 0;
1413         while (!used_vertex && k<nnsp_size) {
1414           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1415           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1416             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1417             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1418             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1419             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1420             total_counts++;
1421             used_vertex = PETSC_TRUE;
1422           }
1423           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1424           k++;
1425         }
1426       }
1427     }
1428     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1429     n_vertices = total_counts;
1430   }
1431 
1432   /* edges and faces */
1433   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1434     if (i<n_ISForEdges) {
1435       used_IS = &ISForEdges[i];
1436       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1437     } else {
1438       used_IS = &ISForFaces[i-n_ISForEdges];
1439       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1440     }
1441     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1442     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1443     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1444     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1445     /* change of basis should not be performed on local periodic nodes */
1446     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1447     if (nnsp_has_cnst) {
1448       PetscScalar quad_value;
1449       temp_constraints++;
1450       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1451       for (j=0;j<size_of_constraint;j++) {
1452         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1453         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1454         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1455       }
1456       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1457       if (boolforchange) {
1458         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1459       }
1460       total_counts++;
1461     }
1462     for (k=0;k<nnsp_size;k++) {
1463       PetscReal real_value;
1464       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1465       for (j=0;j<size_of_constraint;j++) {
1466         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1467         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1468         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1469       }
1470       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1471       /* check if array is null on the connected component */
1472       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1473       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1474       if (real_value > 0.0) { /* keep indices and values */
1475         temp_constraints++;
1476         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1477         if (boolforchange) {
1478           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1479         }
1480         total_counts++;
1481       }
1482     }
1483     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1484     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1485     if (!pcbddc->use_nnsp_true) {
1486       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1487 
1488 #if defined(PETSC_MISSING_LAPACK_GESVD)
1489       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1490          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1491          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1492             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1493             from that computed using LAPACKgesvd
1494          -> This is due to a different computation of eigenvectors in LAPACKheev
1495          -> The quality of the POD-computed basis will be the same */
1496       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1497       /* Store upper triangular part of correlation matrix */
1498       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1499       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1500       for (j=0;j<temp_constraints;j++) {
1501         for (k=0;k<j+1;k++) {
1502           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1503         }
1504       }
1505       /* compute eigenvalues and eigenvectors of correlation matrix */
1506       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1507       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1508 #if !defined(PETSC_USE_COMPLEX)
1509       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1510 #else
1511       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1512 #endif
1513       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1514       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1515       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1516       j=0;
1517       while (j < temp_constraints && singular_vals[j] < tol) j++;
1518       total_counts=total_counts-j;
1519       /* scale and copy POD basis into used quadrature memory */
1520       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1521       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1522       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1523       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1524       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1525       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1526       if (j<temp_constraints) {
1527         PetscInt ii;
1528         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1529         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1530         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1531         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1532         for (k=0;k<temp_constraints-j;k++) {
1533           for (ii=0;ii<size_of_constraint;ii++) {
1534             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1535           }
1536         }
1537       }
1538 #else  /* on missing GESVD */
1539       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1540       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1541       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1542       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1543 #if !defined(PETSC_USE_COMPLEX)
1544       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1545 #else
1546       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1547 #endif
1548       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1549       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1550       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1551       k = temp_constraints;
1552       if (k > size_of_constraint) k = size_of_constraint;
1553       j = 0;
1554       while (j < k && singular_vals[k-j-1] < tol) j++;
1555       total_counts = total_counts-temp_constraints+k-j;
1556 #endif /* on missing GESVD */
1557     }
1558   }
1559   /* free index sets of faces, edges and vertices */
1560   for (i=0;i<n_ISForFaces;i++) {
1561     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1562   }
1563   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1564   for (i=0;i<n_ISForEdges;i++) {
1565     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1566   }
1567   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1568   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1569 
1570   /* free workspace */
1571   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1572     ierr = PetscFree(work);CHKERRQ(ierr);
1573 #if defined(PETSC_USE_COMPLEX)
1574     ierr = PetscFree(rwork);CHKERRQ(ierr);
1575 #endif
1576     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1577 #if defined(PETSC_MISSING_LAPACK_GESVD)
1578     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1579     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1580 #endif
1581   }
1582   for (k=0;k<nnsp_size;k++) {
1583     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1584   }
1585   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1586 
1587   /* set quantities in pcbddc data structure */
1588   /* n_vertices defines the number of subdomain corners in the primal space */
1589   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1590   pcbddc->local_primal_size = total_counts;
1591   pcbddc->n_vertices = n_vertices;
1592   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1593 
1594   /* Create constraint matrix */
1595   /* The constraint matrix is used to compute the l2g map of primal dofs */
1596   /* so we need to set it up properly either with or without change of basis */
1597   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1598   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1599   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1600   /* array to compute a local numbering of constraints : vertices first then constraints */
1601   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1602   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1603   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1604   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1605   /* auxiliary stuff for basis change */
1606   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1607   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1608 
1609   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1610   total_primal_vertices=0;
1611   for (i=0;i<pcbddc->local_primal_size;i++) {
1612     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1613     if (size_of_constraint == 1) {
1614       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1615       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1616       aux_primal_minloc[total_primal_vertices]=0;
1617       total_primal_vertices++;
1618     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1619       PetscInt min_loc,min_index;
1620       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1621       /* find first untouched local node */
1622       k = 0;
1623       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1624       min_index = global_indices[k];
1625       min_loc = k;
1626       /* search the minimum among global nodes already untouched on the cc */
1627       for (k=1;k<size_of_constraint;k++) {
1628         /* there can be more than one constraint on a single connected component */
1629         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1630           min_index = global_indices[k];
1631           min_loc = k;
1632         }
1633       }
1634       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1635       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1636       aux_primal_minloc[total_primal_vertices]=min_loc;
1637       total_primal_vertices++;
1638     }
1639   }
1640   /* free workspace */
1641   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1642   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1643   /* permute indices in order to have a sorted set of vertices */
1644   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1645 
1646   /* nonzero structure of constraint matrix */
1647   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1648   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1649   j=total_primal_vertices;
1650   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1651     if (!PetscBTLookup(change_basis,i)) {
1652       nnz[j]=temp_indices[i+1]-temp_indices[i];
1653       j++;
1654     }
1655   }
1656   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1657   ierr = PetscFree(nnz);CHKERRQ(ierr);
1658   /* set values in constraint matrix */
1659   for (i=0;i<total_primal_vertices;i++) {
1660     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1661   }
1662   total_counts = total_primal_vertices;
1663   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1664     if (!PetscBTLookup(change_basis,i)) {
1665       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1666       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1667       total_counts++;
1668     }
1669   }
1670   /* assembling */
1671   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   /*
1674   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1675   */
1676   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1677   if (pcbddc->use_change_of_basis) {
1678     PetscBool qr_needed = PETSC_FALSE;
1679     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1680     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1681     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1682     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1683     /* work arrays */
1684     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1685     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1686     /* nonzeros per row */
1687     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1688       if (PetscBTLookup(change_basis,i)) {
1689         qr_needed = PETSC_TRUE;
1690         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1691         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1692       }
1693     }
1694     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1695     ierr = PetscFree(nnz);CHKERRQ(ierr);
1696     /* Set initial identity in the matrix */
1697     for (i=0;i<pcis->n_B;i++) {
1698       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1699     }
1700 
1701     /* Now we loop on the constraints which need a change of basis */
1702     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1703     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1704     /* Change of basis matrix T computed via QR decomposition of constraints */
1705     if (qr_needed) {
1706       /* dual and primal dofs on a single cc */
1707       PetscInt     dual_dofs,primal_dofs;
1708       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1709       PetscInt     primal_counter;
1710       /* working stuff for GEQRF */
1711       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1712       PetscBLASInt lqr_work;
1713       /* working stuff for UNGQR */
1714       PetscScalar  *gqr_work,lgqr_work_t;
1715       PetscBLASInt lgqr_work;
1716       /* working stuff for TRTRS */
1717       PetscScalar  *trs_rhs;
1718       PetscBLASInt Blas_NRHS;
1719       /* pointers for values insertion into change of basis matrix */
1720       PetscInt     *start_rows,*start_cols;
1721       PetscScalar  *start_vals;
1722       /* working stuff for values insertion */
1723       PetscBT      is_primal;
1724 
1725       /* space to store Q */
1726       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1727       /* first we issue queries for optimal work */
1728       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1729       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1730       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1731       lqr_work = -1;
1732       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1733       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1734       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1735       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1736       lgqr_work = -1;
1737       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1738       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1739       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1740       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1741       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1742       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1743       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1744       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1745       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1746       /* array to store scaling factors for reflectors */
1747       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1748       /* array to store rhs and solution of triangular solver */
1749       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1750       /* array to store whether a node is primal or not */
1751       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1752       for (i=0;i<total_primal_vertices;i++) {
1753         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1754       }
1755 
1756       /* allocating workspace for check */
1757       if (pcbddc->dbg_flag) {
1758         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1759         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1760         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1761       }
1762 
1763       /* loop on constraints and see whether or not they need a change of basis */
1764       /* -> using implicit ordering contained in temp_indices data */
1765       total_counts = pcbddc->n_vertices;
1766       primal_counter = total_counts;
1767       while (total_counts<pcbddc->local_primal_size) {
1768         primal_dofs = 1;
1769         if (PetscBTLookup(change_basis,total_counts)) {
1770           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1771           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1772             primal_dofs++;
1773           }
1774           /* get constraint info */
1775           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1776           dual_dofs = size_of_constraint-primal_dofs;
1777 
1778           /* copy quadrature constraints for change of basis check */
1779           if (pcbddc->dbg_flag) {
1780             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1781             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1782           }
1783 
1784           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1785           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1786 
1787           /* compute QR decomposition of constraints */
1788           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1789           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1790           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1791           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1792           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1793           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1794           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1795 
1796           /* explictly compute R^-T */
1797           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1798           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1799           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1800           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1801           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1802           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1803           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1804           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1805           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1806           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1807 
1808           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1809           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1810           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1811           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1812           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1813           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1814           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1815           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1816           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1817 
1818           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1819              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1820              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1821           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1822           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1823           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1824           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1825           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1826           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1827           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1828           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1829           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1830           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1831 
1832           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1833           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1834           /* insert cols for primal dofs */
1835           for (j=0;j<primal_dofs;j++) {
1836             start_vals = &qr_basis[j*size_of_constraint];
1837             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1838             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1839           }
1840           /* insert cols for dual dofs */
1841           for (j=0,k=0;j<dual_dofs;k++) {
1842             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1843               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1844               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1845               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1846               j++;
1847             }
1848           }
1849 
1850           /* check change of basis */
1851           if (pcbddc->dbg_flag) {
1852             PetscInt   ii,jj;
1853             PetscBool valid_qr=PETSC_TRUE;
1854             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1855             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1856             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1857             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1858             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1859             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1860             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1861             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1862             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1863             for (jj=0;jj<size_of_constraint;jj++) {
1864               for (ii=0;ii<primal_dofs;ii++) {
1865                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1866                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1867               }
1868             }
1869             if (!valid_qr) {
1870               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1871               for (jj=0;jj<size_of_constraint;jj++) {
1872                 for (ii=0;ii<primal_dofs;ii++) {
1873                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1874                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1875                   }
1876                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1877                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1878                   }
1879                 }
1880               }
1881             } else {
1882               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1883             }
1884           }
1885           /* increment primal counter */
1886           primal_counter += primal_dofs;
1887         } else {
1888           if (pcbddc->dbg_flag) {
1889             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1890           }
1891         }
1892         /* increment constraint counter total_counts */
1893         total_counts += primal_dofs;
1894       }
1895       if (pcbddc->dbg_flag) {
1896         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1897         ierr = PetscFree(work);CHKERRQ(ierr);
1898       }
1899       /* free workspace */
1900       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1901       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1902       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1903       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1904       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1905       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1906     }
1907     /* assembling */
1908     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1909     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1910     /*
1911     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1912     */
1913   }
1914   /* free workspace */
1915   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1916   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1917   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1918   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1919   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1920   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1921   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1922   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1928 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1929 {
1930   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1931   PC_IS       *pcis = (PC_IS*)pc->data;
1932   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1933   PetscInt    bs,ierr,i,vertex_size;
1934   PetscViewer viewer=pcbddc->dbg_viewer;
1935 
1936   PetscFunctionBegin;
1937   /* Init local Graph struct */
1938   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1939 
1940   /* Check validity of the csr graph passed in by the user */
1941   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1942     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1943   }
1944   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1945   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1946     Mat mat_adj;
1947     const PetscInt *xadj,*adjncy;
1948     PetscBool flg_row=PETSC_TRUE;
1949 
1950     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1951     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1952     if (!flg_row) {
1953       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1954     }
1955     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1956     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1957     if (!flg_row) {
1958       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1959     }
1960     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1961   }
1962 
1963   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1964   vertex_size = 1;
1965   if (!pcbddc->n_ISForDofs) {
1966     IS *custom_ISForDofs;
1967 
1968     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1969     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1970     for (i=0;i<bs;i++) {
1971       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1972     }
1973     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1974     /* remove my references to IS objects */
1975     for (i=0;i<bs;i++) {
1976       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1977     }
1978     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1979   } else { /* mat block size as vertex size (used for elasticity) */
1980     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1981   }
1982 
1983   /* Setup of Graph */
1984   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1985 
1986   /* Graph's connected components analysis */
1987   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1988 
1989   /* print some info to stdout */
1990   if (pcbddc->dbg_flag) {
1991     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1998 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[])
1999 {
2000   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2001   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   n = 0;
2006   vertices = 0;
2007   if (pcbddc->ConstraintMatrix) {
2008     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2009     for (i=0;i<local_primal_size;i++) {
2010       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2011       if (size_of_constraint == 1) n++;
2012       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2013     }
2014     if (vertices_idx) {
2015       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2016       n = 0;
2017       for (i=0;i<local_primal_size;i++) {
2018         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2019         if (size_of_constraint == 1) {
2020           vertices[n++]=row_cmat_indices[0];
2021         }
2022         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2023       }
2024     }
2025   }
2026   *n_vertices = n;
2027   if (vertices_idx) *vertices_idx = vertices;
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNCT__
2032 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2033 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[])
2034 {
2035   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2036   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2037   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2038   PetscBT        touched;
2039   PetscErrorCode ierr;
2040 
2041   PetscFunctionBegin;
2042   n = 0;
2043   constraints_index = 0;
2044   if (pcbddc->ConstraintMatrix) {
2045     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2046     max_size_of_constraint = 0;
2047     for (i=0;i<local_primal_size;i++) {
2048       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2049       if (size_of_constraint > 1) {
2050         n++;
2051       }
2052       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2053       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2054     }
2055     if (constraints_idx) {
2056       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2057       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2058       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2059       n = 0;
2060       for (i=0;i<local_primal_size;i++) {
2061         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2062         if (size_of_constraint > 1) {
2063           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2064           /* find first untouched local node */
2065           j = 0;
2066           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2067           min_index = row_cmat_global_indices[j];
2068           min_loc = j;
2069           /* search the minimum among nodes not yet touched on the connected component
2070              since there can be more than one constraint on a single cc */
2071           for (j=1;j<size_of_constraint;j++) {
2072             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2073               min_index = row_cmat_global_indices[j];
2074               min_loc = j;
2075             }
2076           }
2077           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2078           constraints_index[n++] = row_cmat_indices[min_loc];
2079         }
2080         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2081       }
2082       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2083       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2084     }
2085   }
2086   *n_constraints = n;
2087   if (constraints_idx) *constraints_idx = constraints_index;
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 /* the next two functions has been adapted from pcis.c */
2092 #undef __FUNCT__
2093 #define __FUNCT__ "PCBDDCApplySchur"
2094 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2095 {
2096   PetscErrorCode ierr;
2097   PC_IS          *pcis = (PC_IS*)(pc->data);
2098 
2099   PetscFunctionBegin;
2100   if (!vec2_B) { vec2_B = v; }
2101   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2102   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2103   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2104   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2105   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 #undef __FUNCT__
2110 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2111 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2112 {
2113   PetscErrorCode ierr;
2114   PC_IS          *pcis = (PC_IS*)(pc->data);
2115 
2116   PetscFunctionBegin;
2117   if (!vec2_B) { vec2_B = v; }
2118   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2119   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2120   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2121   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2122   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 #undef __FUNCT__
2127 #define __FUNCT__ "PCBDDCSubsetNumbering"
2128 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2129 {
2130   Vec            local_vec,global_vec;
2131   IS             seqis,paris;
2132   VecScatter     scatter_ctx;
2133   PetscScalar    *array;
2134   PetscInt       *temp_global_dofs;
2135   PetscScalar    globalsum;
2136   PetscInt       i,j,s;
2137   PetscInt       nlocals,first_index,old_index,max_local;
2138   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2139   PetscMPIInt    *dof_sizes,*dof_displs;
2140   PetscBool      first_found;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   /* mpi buffers */
2145   MPI_Comm_size(comm,&size_prec_comm);
2146   MPI_Comm_rank(comm,&rank_prec_comm);
2147   j = ( !rank_prec_comm ? size_prec_comm : 0);
2148   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2149   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2150   /* get maximum size of subset */
2151   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2152   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2153   max_local = 0;
2154   if (n_local_dofs) {
2155     max_local = temp_global_dofs[0];
2156     for (i=1;i<n_local_dofs;i++) {
2157       if (max_local < temp_global_dofs[i] ) {
2158         max_local = temp_global_dofs[i];
2159       }
2160     }
2161   }
2162   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2163   max_global++;
2164   max_local = 0;
2165   if (n_local_dofs) {
2166     max_local = local_dofs[0];
2167     for (i=1;i<n_local_dofs;i++) {
2168       if (max_local < local_dofs[i] ) {
2169         max_local = local_dofs[i];
2170       }
2171     }
2172   }
2173   max_local++;
2174   /* allocate workspace */
2175   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2176   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2177   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2178   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2179   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2180   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2181   /* create scatter */
2182   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2183   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2184   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2185   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2186   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2187   /* init array */
2188   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2189   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2190   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2191   if (local_dofs_mult) {
2192     for (i=0;i<n_local_dofs;i++) {
2193       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2194     }
2195   } else {
2196     for (i=0;i<n_local_dofs;i++) {
2197       array[local_dofs[i]]=1.0;
2198     }
2199   }
2200   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2201   /* scatter into global vec and get total number of global dofs */
2202   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2203   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2204   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2205   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2206   /* Fill global_vec with cumulative function for global numbering */
2207   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2208   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2209   nlocals = 0;
2210   first_index = -1;
2211   first_found = PETSC_FALSE;
2212   for (i=0;i<s;i++) {
2213     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2214       first_found = PETSC_TRUE;
2215       first_index = i;
2216     }
2217     nlocals += (PetscInt)PetscRealPart(array[i]);
2218   }
2219   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2220   if (!rank_prec_comm) {
2221     dof_displs[0]=0;
2222     for (i=1;i<size_prec_comm;i++) {
2223       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2224     }
2225   }
2226   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2227   if (first_found) {
2228     array[first_index] += (PetscScalar)nlocals;
2229     old_index = first_index;
2230     for (i=first_index+1;i<s;i++) {
2231       if (PetscRealPart(array[i]) > 0.0) {
2232         array[i] += array[old_index];
2233         old_index = i;
2234       }
2235     }
2236   }
2237   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2238   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2239   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2240   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2241   /* get global ordering of local dofs */
2242   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2243   if (local_dofs_mult) {
2244     for (i=0;i<n_local_dofs;i++) {
2245       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2246     }
2247   } else {
2248     for (i=0;i<n_local_dofs;i++) {
2249       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2250     }
2251   }
2252   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2253   /* free workspace */
2254   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2255   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2256   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2257   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2258   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2259   /* return pointer to global ordering of local dofs */
2260   *global_numbering_subset = temp_global_dofs;
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2266 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2267 {
2268   PetscInt       i,j;
2269   PetscScalar    *alphas;
2270   PetscErrorCode ierr;
2271 
2272   PetscFunctionBegin;
2273   /* this implements stabilized Gram-Schmidt */
2274   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2275   for (i=0;i<n;i++) {
2276     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2277     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2278     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2279   }
2280   ierr = PetscFree(alphas);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 /* BDDC requires metis 5.0.1 for multilevel */
2285 #if defined(PETSC_HAVE_METIS)
2286 #include "metis.h"
2287 #define MetisInt    idx_t
2288 #define MetisScalar real_t
2289 #endif
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "PCBDDCSetUpCoarseEnvironment"
2293 PetscErrorCode PCBDDCSetUpCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals)
2294 {
2295   Mat_IS    *matis    = (Mat_IS*)pc->pmat->data;
2296   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
2297   PC_IS     *pcis     = (PC_IS*)pc->data;
2298   MPI_Comm  prec_comm;
2299   MPI_Comm  coarse_comm;
2300 
2301   PetscInt  coarse_size;
2302 
2303   MatNullSpace CoarseNullSpace;
2304 
2305   /* common to all choiches */
2306   PetscScalar *temp_coarse_mat_vals;
2307   PetscScalar *ins_coarse_mat_vals;
2308   PetscInt    *ins_local_primal_indices;
2309   PetscMPIInt *localsizes2,*localdispl2;
2310   PetscMPIInt size_prec_comm;
2311   PetscMPIInt rank_prec_comm;
2312   PetscMPIInt active_rank=MPI_PROC_NULL;
2313   PetscMPIInt master_proc=0;
2314   PetscInt    ins_local_primal_size;
2315   /* specific to MULTILEVEL_BDDC */
2316   PetscMPIInt *ranks_recv=0;
2317   PetscMPIInt count_recv=0;
2318   PetscMPIInt rank_coarse_proc_send_to=-1;
2319   PetscMPIInt coarse_color = MPI_UNDEFINED;
2320   ISLocalToGlobalMapping coarse_ISLG;
2321   /* some other variables */
2322   PetscErrorCode ierr;
2323   MatType coarse_mat_type;
2324   PCType  coarse_pc_type;
2325   KSPType coarse_ksp_type;
2326   PC pc_temp;
2327   PetscInt i,j,k;
2328   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
2329   /* verbose output viewer */
2330   PetscViewer viewer=pcbddc->dbg_viewer;
2331   PetscInt    dbg_flag=pcbddc->dbg_flag;
2332 
2333   PetscInt      offset,offset2;
2334   PetscMPIInt   im_active,active_procs;
2335   PetscInt      *dnz,*onz;
2336 
2337   PetscBool     setsym,issym=PETSC_FALSE;
2338 
2339   PetscFunctionBegin;
2340   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2341   ins_local_primal_indices = 0;
2342   ins_coarse_mat_vals      = 0;
2343   localsizes2              = 0;
2344   localdispl2              = 0;
2345   temp_coarse_mat_vals     = 0;
2346   coarse_ISLG              = 0;
2347 
2348   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2349   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2350   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2351 
2352   /* Assign global numbering to coarse dofs */
2353   {
2354     PetscInt     *auxlocal_primal,*aux_idx;
2355     PetscMPIInt  mpi_local_primal_size;
2356     PetscScalar  coarsesum,*array;
2357 
2358     mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
2359 
2360     /* Construct needed data structures for message passing */
2361     j = 0;
2362     if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2363       j = size_prec_comm;
2364     }
2365     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr);
2366     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2367     /* Gather local_primal_size information for all processes  */
2368     if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2369       ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2370     } else {
2371       ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr);
2372     }
2373     pcbddc->replicated_primal_size = 0;
2374     for (i=0; i<j; i++) {
2375       pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ;
2376       pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
2377     }
2378 
2379     /* This code fragment assumes that the number of local constraints per connected component
2380        is not greater than the number of nodes defined for the connected component
2381        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2382     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2383     ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2384     ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2385     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2386     ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2387     ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2388     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2389     /* Compute number of coarse dofs */
2390     ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr);
2391 
2392     if (dbg_flag) {
2393       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2394       ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2395       ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr);
2396       ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2397       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2398       for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0;
2399       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2400       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2401       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2402       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2403       ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2404       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2405       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2406       for (i=0;i<pcis->n;i++) {
2407         if (array[i] == 1.0) {
2408           ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr);
2409           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr);
2410         }
2411       }
2412       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2413       for (i=0;i<pcis->n;i++) {
2414         if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
2415       }
2416       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2417       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2418       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2419       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2420       ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
2421       ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr);
2422       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2423     }
2424     ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
2425   }
2426 
2427   if (dbg_flag) {
2428     ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr);
2429     if (dbg_flag > 1) {
2430       ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
2431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2432       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2433       for (i=0;i<pcbddc->local_primal_size;i++) {
2434         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]);
2435       }
2436       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2437     }
2438   }
2439 
2440   im_active = 0;
2441   if (pcis->n) im_active = 1;
2442   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr);
2443 
2444   /* adapt coarse problem type */
2445 #if defined(PETSC_HAVE_METIS)
2446   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2447     if (pcbddc->current_level < pcbddc->max_levels) {
2448       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2449         if (dbg_flag) {
2450           ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2451          ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2452         }
2453         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2454       }
2455     } else {
2456       if (dbg_flag) {
2457         ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2458         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2459       }
2460       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2461     }
2462   }
2463 #else
2464   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2465 #endif
2466 
2467   switch(pcbddc->coarse_problem_type){
2468 
2469     case(MULTILEVEL_BDDC):   /* we define a coarse mesh where subdomains are elements */
2470     {
2471 #if defined(PETSC_HAVE_METIS)
2472       /* we need additional variables */
2473       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2474       MetisInt    *metis_coarse_subdivision;
2475       MetisInt    options[METIS_NOPTIONS];
2476       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2477       PetscMPIInt procs_jumps_coarse_comm;
2478       PetscMPIInt *coarse_subdivision;
2479       PetscMPIInt *total_count_recv;
2480       PetscMPIInt *total_ranks_recv;
2481       PetscMPIInt *displacements_recv;
2482       PetscMPIInt *my_faces_connectivity;
2483       PetscMPIInt *petsc_faces_adjncy;
2484       MetisInt    *faces_adjncy;
2485       MetisInt    *faces_xadj;
2486       PetscMPIInt *number_of_faces;
2487       PetscMPIInt *faces_displacements;
2488       PetscInt    *array_int;
2489       PetscMPIInt my_faces=0;
2490       PetscMPIInt total_faces=0;
2491       PetscInt    ranks_stretching_ratio;
2492 
2493       /* define some quantities */
2494       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2495       coarse_mat_type = MATIS;
2496       coarse_pc_type  = PCBDDC;
2497       coarse_ksp_type = KSPRICHARDSON;
2498 
2499       /* details of coarse decomposition */
2500       n_subdomains = active_procs;
2501       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2502       ranks_stretching_ratio = size_prec_comm/active_procs;
2503       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2504 
2505 #if 0
2506       PetscMPIInt *old_ranks;
2507       PetscInt    *new_ranks,*jj,*ii;
2508       MatPartitioning mat_part;
2509       IS coarse_new_decomposition,is_numbering;
2510       PetscViewer viewer_test;
2511       MPI_Comm    test_coarse_comm;
2512       PetscMPIInt test_coarse_color;
2513       Mat         mat_adj;
2514       /* Create new communicator for coarse problem splitting the old one */
2515       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2516          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2517       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2518       test_coarse_comm = MPI_COMM_NULL;
2519       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2520       if (im_active) {
2521         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2522         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2523         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2524         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2525         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2526         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2527         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2528         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2529         k = pcis->n_neigh-1;
2530         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2531         ii[0]=0;
2532         ii[1]=k;
2533         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2534         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2535         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2536         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2537         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2538         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2539         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2540         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2541         printf("Setting Nparts %d\n",n_parts);
2542         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2543         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2544         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2545         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2546         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2547         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2548         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2549         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2550         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2551         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2552         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2553         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2554         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2555       }
2556 #endif
2557 
2558       /* build CSR graph of subdomains' connectivity */
2559       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2560       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2561       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2562         for (j=0;j<pcis->n_shared[i];j++){
2563           array_int[ pcis->shared[i][j] ]+=1;
2564         }
2565       }
2566       for (i=1;i<pcis->n_neigh;i++){
2567         for (j=0;j<pcis->n_shared[i];j++){
2568           if (array_int[ pcis->shared[i][j] ] > 0 ){
2569             my_faces++;
2570             break;
2571           }
2572         }
2573       }
2574 
2575       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2576       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2577       my_faces=0;
2578       for (i=1;i<pcis->n_neigh;i++){
2579         for (j=0;j<pcis->n_shared[i];j++){
2580           if (array_int[ pcis->shared[i][j] ] > 0 ){
2581             my_faces_connectivity[my_faces]=pcis->neigh[i];
2582             my_faces++;
2583             break;
2584           }
2585         }
2586       }
2587       if (rank_prec_comm == master_proc) {
2588         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2589         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2590         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2591         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2592         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2593       }
2594       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2595       if (rank_prec_comm == master_proc) {
2596         faces_xadj[0]=0;
2597         faces_displacements[0]=0;
2598         j=0;
2599         for (i=1;i<size_prec_comm+1;i++) {
2600           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2601           if (number_of_faces[i-1]) {
2602             j++;
2603             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2604           }
2605         }
2606       }
2607       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2608       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2609       ierr = PetscFree(array_int);CHKERRQ(ierr);
2610       if (rank_prec_comm == master_proc) {
2611         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2612         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2613         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2614         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2615       }
2616 
2617       if ( rank_prec_comm == master_proc ) {
2618 
2619         PetscInt heuristic_for_metis=3;
2620 
2621         ncon=1;
2622         faces_nvtxs=n_subdomains;
2623         /* partition graoh induced by face connectivity */
2624         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2625         ierr = METIS_SetDefaultOptions(options);
2626         /* we need a contiguous partition of the coarse mesh */
2627         options[METIS_OPTION_CONTIG]=1;
2628         options[METIS_OPTION_NITER]=30;
2629         if (pcbddc->coarsening_ratio > 1) {
2630           if (n_subdomains>n_parts*heuristic_for_metis) {
2631             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2632             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2633             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2634             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2635           } else {
2636             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2637             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2638           }
2639         } else {
2640           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2641         }
2642         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2643         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2644         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2645 
2646         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2647         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2648         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2649         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2650       }
2651 
2652       /* Create new communicator for coarse problem splitting the old one */
2653       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2654         coarse_color=0;              /* for communicator splitting */
2655         active_rank=rank_prec_comm;  /* for insertion of matrix values */
2656       }
2657       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2658          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2659       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2660 
2661       if ( coarse_color == 0 ) {
2662         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2663         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2664       } else {
2665         rank_coarse_comm = MPI_PROC_NULL;
2666       }
2667 
2668       /* master proc take care of arranging and distributing coarse information */
2669       if (rank_coarse_comm == master_proc) {
2670         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2671         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2672         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2673         /* some initializations */
2674         displacements_recv[0]=0;
2675         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2676         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2677         for (j=0;j<size_coarse_comm;j++) {
2678           for (i=0;i<size_prec_comm;i++) {
2679           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2680           }
2681         }
2682         /* displacements needed for scatterv of total_ranks_recv */
2683       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2684 
2685         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2686         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2687         for (j=0;j<size_coarse_comm;j++) {
2688           for (i=0;i<size_prec_comm;i++) {
2689             if (coarse_subdivision[i]==j) {
2690               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2691               total_count_recv[j]+=1;
2692             }
2693           }
2694         }
2695         /*for (j=0;j<size_coarse_comm;j++) {
2696           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2697           for (i=0;i<total_count_recv[j];i++) {
2698             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2699           }
2700           printf("\n");
2701         }*/
2702 
2703         /* identify new decomposition in terms of ranks in the old communicator */
2704         for (i=0;i<n_subdomains;i++) {
2705           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2706         }
2707         /*printf("coarse_subdivision in old end new ranks\n");
2708         for (i=0;i<size_prec_comm;i++)
2709           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2710             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2711           } else {
2712             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2713           }
2714         printf("\n");*/
2715       }
2716 
2717       /* Scatter new decomposition for send details */
2718       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2719       /* Scatter receiving details to members of coarse decomposition */
2720       if ( coarse_color == 0) {
2721         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2722         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2723         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2724       }
2725 
2726       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2727       if (coarse_color == 0) {
2728         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2729         for (i=0;i<count_recv;i++)
2730           printf("%d ",ranks_recv[i]);
2731         printf("\n");
2732       }*/
2733 
2734       if (rank_prec_comm == master_proc) {
2735         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2736         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2737         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2738         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2739       }
2740 #endif
2741       break;
2742     }
2743 
2744     case(REPLICATED_BDDC):
2745 
2746       pcbddc->coarse_communications_type = GATHERS_BDDC;
2747       coarse_mat_type = MATSEQAIJ;
2748       coarse_pc_type  = PCLU;
2749       coarse_ksp_type  = KSPPREONLY;
2750       coarse_comm = PETSC_COMM_SELF;
2751       active_rank = rank_prec_comm;
2752       break;
2753 
2754     case(PARALLEL_BDDC):
2755 
2756       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2757       coarse_mat_type = MATAIJ;
2758       coarse_pc_type  = PCREDUNDANT;
2759       coarse_ksp_type  = KSPPREONLY;
2760       coarse_comm = prec_comm;
2761       active_rank = rank_prec_comm;
2762       break;
2763 
2764     case(SEQUENTIAL_BDDC):
2765       pcbddc->coarse_communications_type = GATHERS_BDDC;
2766       coarse_mat_type = MATAIJ;
2767       coarse_pc_type = PCLU;
2768       coarse_ksp_type  = KSPPREONLY;
2769       coarse_comm = PETSC_COMM_SELF;
2770       active_rank = master_proc;
2771       break;
2772   }
2773 
2774   switch(pcbddc->coarse_communications_type){
2775 
2776     case(SCATTERS_BDDC):
2777       {
2778         if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2779 
2780           IS coarse_IS;
2781 
2782           if(pcbddc->coarsening_ratio == 1) {
2783             ins_local_primal_size = pcbddc->local_primal_size;
2784             ins_local_primal_indices = pcbddc->local_primal_indices;
2785             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2786             /* nonzeros */
2787             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2788             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2789             for (i=0;i<ins_local_primal_size;i++) {
2790               dnz[i] = ins_local_primal_size;
2791             }
2792           } else {
2793             PetscMPIInt send_size;
2794             PetscMPIInt *send_buffer;
2795             PetscInt    *aux_ins_indices;
2796             PetscInt    ii,jj;
2797             MPI_Request *requests;
2798 
2799             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2800             /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */
2801             ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
2802             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2803             pcbddc->replicated_primal_size = count_recv;
2804             j = 0;
2805             for (i=0;i<count_recv;i++) {
2806               pcbddc->local_primal_displacements[i] = j;
2807               j += pcbddc->local_primal_sizes[ranks_recv[i]];
2808             }
2809             pcbddc->local_primal_displacements[count_recv] = j;
2810             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2811             /* allocate auxiliary space */
2812             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2813             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2814             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2815             /* allocate stuffs for message massing */
2816             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2817             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2818             /* send indices to be inserted */
2819             for (i=0;i<count_recv;i++) {
2820               send_size = pcbddc->local_primal_sizes[ranks_recv[i]];
2821               ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2822             }
2823             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2824               send_size = pcbddc->local_primal_size;
2825               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2826               for (i=0;i<send_size;i++) {
2827                 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2828               }
2829               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2830             }
2831             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2832             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2833               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2834             }
2835             j = 0;
2836             for (i=0;i<count_recv;i++) {
2837               ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i];
2838               localsizes2[i] = ii*ii;
2839               localdispl2[i] = j;
2840               j += localsizes2[i];
2841               jj = pcbddc->local_primal_displacements[i];
2842               /* it counts the coarse subdomains sharing the coarse node */
2843               for (k=0;k<ii;k++) {
2844                 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1;
2845               }
2846             }
2847             /* temp_coarse_mat_vals used to store matrix values to be received */
2848             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2849             /* evaluate how many values I will insert in coarse mat */
2850             ins_local_primal_size = 0;
2851             for (i=0;i<coarse_size;i++) {
2852               if (aux_ins_indices[i]) {
2853                 ins_local_primal_size++;
2854               }
2855             }
2856             /* evaluate indices I will insert in coarse mat */
2857             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2858             j = 0;
2859             for(i=0;i<coarse_size;i++) {
2860               if(aux_ins_indices[i]) {
2861                 ins_local_primal_indices[j] = i;
2862                 j++;
2863               }
2864             }
2865             /* processes partecipating in coarse problem receive matrix data from their friends */
2866             for (i=0;i<count_recv;i++) {
2867               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2868             }
2869             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2870               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2871               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2872             }
2873             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2874             /* nonzeros */
2875             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2876             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2877             /* use aux_ins_indices to realize a global to local mapping */
2878             j=0;
2879             for(i=0;i<coarse_size;i++){
2880               if(aux_ins_indices[i]==0){
2881                 aux_ins_indices[i]=-1;
2882               } else {
2883                 aux_ins_indices[i]=j;
2884                 j++;
2885               }
2886             }
2887             for (i=0;i<count_recv;i++) {
2888               j = pcbddc->local_primal_sizes[ranks_recv[i]];
2889               for (k=0;k<j;k++) {
2890                 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j;
2891               }
2892             }
2893             /* check */
2894             for (i=0;i<ins_local_primal_size;i++) {
2895               if (dnz[i] > ins_local_primal_size) {
2896                 dnz[i] = ins_local_primal_size;
2897               }
2898             }
2899             ierr = PetscFree(requests);CHKERRQ(ierr);
2900             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2901             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2902           }
2903           /* create local to global mapping needed by coarse MATIS */
2904           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2905           coarse_comm = prec_comm;
2906           active_rank = rank_prec_comm;
2907           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2908           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2909           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2910         } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2911           /* arrays for values insertion */
2912           ins_local_primal_size = pcbddc->local_primal_size;
2913           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2914           ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr);
2915           for (j=0;j<ins_local_primal_size;j++){
2916             ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
2917             for (i=0;i<ins_local_primal_size;i++) {
2918               ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2919             }
2920           }
2921         }
2922         break;
2923 
2924     }
2925 
2926     case(GATHERS_BDDC):
2927       {
2928 
2929         PetscMPIInt mysize,mysize2;
2930         PetscMPIInt *send_buffer;
2931 
2932         if (rank_prec_comm==active_rank) {
2933           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2934           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
2935           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2936           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2937           /* arrays for values insertion */
2938       for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
2939           localdispl2[0]=0;
2940       for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
2941           j=0;
2942       for (i=0;i<size_prec_comm;i++) j+=localsizes2[i];
2943           ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2944         }
2945 
2946         mysize=pcbddc->local_primal_size;
2947         mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
2948         ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2949     for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2950 
2951         if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){
2952           ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2953           ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr);
2954         } else {
2955           ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr);
2956           ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr);
2957         }
2958         ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2959         break;
2960       }/* switch on coarse problem and communications associated with finished */
2961   }
2962 
2963   /* Now create and fill up coarse matrix */
2964   if ( rank_prec_comm == active_rank ) {
2965 
2966     Mat matis_coarse_local_mat;
2967 
2968     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2969       ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr);
2970       ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr);
2971       ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr);
2972       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2973       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2974       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2975       ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2976       ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2977     } else {
2978       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr);
2979       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2980       ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2981       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2982       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2983       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2984       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2985       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2986     }
2987     /* preallocation */
2988     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2989 
2990       PetscInt lrows,lcols,bs;
2991 
2992       ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr);
2993       ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr);
2994       ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr);
2995 
2996       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2997 
2998         Vec         vec_dnz,vec_onz;
2999         PetscScalar *my_dnz,*my_onz,*array;
3000         PetscInt    *mat_ranges,*row_ownership;
3001         PetscInt    coarse_index_row,coarse_index_col,owner;
3002 
3003         ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr);
3004         ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
3005         ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr);
3006         ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr);
3007         ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
3008 
3009         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr);
3010         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr);
3011         ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3012         ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3013 
3014         ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr);
3015         ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
3016         for (i=0;i<size_prec_comm;i++) {
3017           for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
3018             row_ownership[j]=i;
3019           }
3020         }
3021 
3022         for (i=0;i<pcbddc->local_primal_size;i++) {
3023           coarse_index_row = pcbddc->local_primal_indices[i];
3024           owner = row_ownership[coarse_index_row];
3025           for (j=i;j<pcbddc->local_primal_size;j++) {
3026             owner = row_ownership[coarse_index_row];
3027             coarse_index_col = pcbddc->local_primal_indices[j];
3028             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) {
3029               my_dnz[i] += 1.0;
3030             } else {
3031               my_onz[i] += 1.0;
3032             }
3033             if (i != j) {
3034               owner = row_ownership[coarse_index_col];
3035               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) {
3036                 my_dnz[j] += 1.0;
3037               } else {
3038                 my_onz[j] += 1.0;
3039               }
3040             }
3041           }
3042         }
3043         ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
3044         ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
3045         if (pcbddc->local_primal_size) {
3046           ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
3047           ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
3048         }
3049         ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
3050         ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
3051         ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
3052         ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
3053         j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
3054         ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
3055         for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
3056 
3057         ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
3058         ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
3059         for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
3060 
3061         ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
3062         ierr = PetscFree(my_dnz);CHKERRQ(ierr);
3063         ierr = PetscFree(my_onz);CHKERRQ(ierr);
3064         ierr = PetscFree(row_ownership);CHKERRQ(ierr);
3065         ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
3066         ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
3067       } else {
3068         for (k=0;k<size_prec_comm;k++){
3069           offset=pcbddc->local_primal_displacements[k];
3070           offset2=localdispl2[k];
3071           ins_local_primal_size = pcbddc->local_primal_sizes[k];
3072           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3073           for (j=0;j<ins_local_primal_size;j++){
3074             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3075           }
3076           for (j=0;j<ins_local_primal_size;j++) {
3077             ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr);
3078           }
3079           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3080         }
3081       }
3082 
3083       /* check */
3084       for (i=0;i<lrows;i++) {
3085         if (dnz[i]>lcols) dnz[i]=lcols;
3086         if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols;
3087       }
3088       ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr);
3089       ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr);
3090       ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3091     } else {
3092       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
3093       ierr = PetscFree(dnz);CHKERRQ(ierr);
3094     }
3095     /* insert values */
3096     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
3097       ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3098     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3099       if (pcbddc->coarsening_ratio == 1) {
3100         ins_coarse_mat_vals = coarse_submat_vals;
3101         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
3102       } else {
3103         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3104         for (k=0;k<pcbddc->replicated_primal_size;k++) {
3105           offset = pcbddc->local_primal_displacements[k];
3106           offset2 = localdispl2[k];
3107           ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k];
3108           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3109           for (j=0;j<ins_local_primal_size;j++){
3110             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3111           }
3112           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3113           ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3114           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3115         }
3116       }
3117       ins_local_primal_indices = 0;
3118       ins_coarse_mat_vals = 0;
3119     } else {
3120       for (k=0;k<size_prec_comm;k++){
3121         offset=pcbddc->local_primal_displacements[k];
3122         offset2=localdispl2[k];
3123         ins_local_primal_size = pcbddc->local_primal_sizes[k];
3124         ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3125         for (j=0;j<ins_local_primal_size;j++){
3126           ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3127         }
3128         ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3129         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3130         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3131       }
3132       ins_local_primal_indices = 0;
3133       ins_coarse_mat_vals = 0;
3134     }
3135     ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3136     ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3137     /* symmetry of coarse matrix */
3138     if (issym) {
3139       ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3140     }
3141     ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3142   }
3143 
3144   /* create loc to glob scatters if needed */
3145   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
3146      IS local_IS,global_IS;
3147      ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr);
3148      ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr);
3149      ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3150      ierr = ISDestroy(&local_IS);CHKERRQ(ierr);
3151      ierr = ISDestroy(&global_IS);CHKERRQ(ierr);
3152   }
3153 
3154   /* free memory no longer needed */
3155   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
3156   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
3157   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
3158   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
3159   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
3160   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
3161 
3162   /* Compute coarse null space */
3163   CoarseNullSpace = 0;
3164   if (pcbddc->NullSpace) {
3165     ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr);
3166   }
3167 
3168   /* KSP for coarse problem */
3169   if (rank_prec_comm == active_rank) {
3170     PetscBool isbddc=PETSC_FALSE;
3171 
3172     ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr);
3173     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3174     ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3175     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr);
3176     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3177     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3178     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3179     /* Allow user's customization */
3180     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
3181     /* Set Up PC for coarse problem BDDC */
3182     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3183       i = pcbddc->current_level+1;
3184       ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr);
3185       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3186       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3187       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
3188       if (CoarseNullSpace) {
3189         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3190       }
3191       if (dbg_flag) {
3192         ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr);
3193         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3194       }
3195     } else {
3196       if (CoarseNullSpace) {
3197         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3198       }
3199     }
3200     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3201     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3202 
3203     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr);
3204     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3205     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3206     if (j == 1) {
3207       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3208       if (isbddc) {
3209         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
3210       }
3211     }
3212   }
3213   /* Check coarse problem if requested */
3214   if ( dbg_flag && rank_prec_comm == active_rank ) {
3215     KSP check_ksp;
3216     PC  check_pc;
3217     Vec check_vec;
3218     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
3219     KSPType check_ksp_type;
3220 
3221     /* Create ksp object suitable for extreme eigenvalues' estimation */
3222     ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr);
3223     ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3224     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3225     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3226       if (issym) check_ksp_type = KSPCG;
3227       else check_ksp_type = KSPGMRES;
3228       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3229     } else {
3230       check_ksp_type = KSPPREONLY;
3231     }
3232     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3233     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3234     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3235     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3236     /* create random vec */
3237     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3238     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3239     if (CoarseNullSpace) {
3240       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3241     }
3242     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3243     /* solve coarse problem */
3244     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3245     if (CoarseNullSpace) {
3246       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3247     }
3248     /* check coarse problem residual error */
3249     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3250     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3251     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3252     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3253     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3254     /* get eigenvalue estimation if inexact */
3255     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3256       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3257       ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr);
3258       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr);
3259       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3260     }
3261     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3262     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3263     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3264   }
3265   if (dbg_flag) {
3266     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3267   }
3268   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3269 
3270   PetscFunctionReturn(0);
3271 }
3272 
3273