xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision c8587f3400bb52e8a0bc75c0376dec77962410ff)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
10   PetscErrorCode    ierr;
11 
12   PetscFunctionBegin;
13   /* Compute matrix after change of basis and extract local submatrices */
14   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
15 
16   /* Allocate needed vectors */
17   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
18 
19   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
20   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
21 
22   /* Setup local solvers ksp_D and ksp_R */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /* setup local correction and local part of coarse basis */
31   ierr = PCBDDCSetUpCoarseLocal(pc);CHKERRQ(ierr);
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNCT__
36 #define __FUNCT__ "PCBDDCSetLevel"
37 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
38 {
39   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
40 
41   PetscFunctionBegin;
42   pcbddc->current_level=level;
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "PCBDDCResetCustomization"
48 PetscErrorCode PCBDDCResetCustomization(PC pc)
49 {
50   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
51   PetscInt       i;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
60   for (i=0;i<pcbddc->n_ISForDofs;i++) {
61     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
62   }
63   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCCreateWorkVectors"
118 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
119 {
120   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121   PC_IS          *pcis = (PC_IS*)pc->data;
122   VecType        impVecType;
123   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
128   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
129   local_primal_size = n_constraints+n_vertices;
130   n_R = pcis->n-n_vertices;
131   /* local work vectors */
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
134   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
135   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
136   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
137   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
138   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
139   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
140   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
141   if (n_constraints) {
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
145   }
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
151 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc)
152 {
153   PetscErrorCode         ierr;
154   /* pointers to pcis and pcbddc */
155   PC_IS*                 pcis = (PC_IS*)pc->data;
156   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
157   /* submatrices of local problem */
158   Mat                    A_RV,A_VR,A_VV;
159   /* working matrices */
160   Mat                    M1,M2,M3,C_CR;
161   /* working vectors */
162   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
163   /* additional working stuff */
164   IS                     is_aux;
165   ISLocalToGlobalMapping BtoNmap;
166   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
167   const PetscScalar      *array,*row_cmat_values;
168   const PetscInt         *row_cmat_indices,*idx_R_local;
169   PetscInt               *vertices,*idx_V_B,*auxindices;
170   PetscInt               n_vertices,n_constraints,size_of_constraint;
171   PetscInt               i,j,n_R,n_D,n_B;
172   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
173   /* Vector and matrix types */
174   VecType                impVecType;
175   MatType                impMatType;
176   /* some shortcuts to scalars */
177   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
178   /* for debugging purposes */
179   PetscReal              *coarsefunctions_errors,*constraints_errors;
180 
181   PetscFunctionBegin;
182   /* get number of vertices and their local indices */
183   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
184   n_constraints = pcbddc->local_primal_size-n_vertices;
185   /* Set Non-overlapping dimensions */
186   n_B = pcis->n_B; n_D = pcis->n - n_B;
187   n_R = pcis->n-n_vertices;
188 
189   /* Set types for local objects needed by BDDC precondtioner */
190   impMatType = MATSEQDENSE;
191   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
192 
193   /* Allocating some extra storage just to be safe */
194   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
195   for (i=0;i<pcis->n;i++) auxindices[i]=i;
196 
197   /* vertices in boundary numbering */
198   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
199   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
200   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
201   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
202   if (i != n_vertices) {
203     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
204   }
205 
206   /* some work vectors on vertices and/or constraints */
207   if (n_vertices) {
208     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
209     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
210     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
211     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
212   }
213   if (n_constraints) {
214     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
215     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
216   }
217 
218   /* Precompute stuffs needed for preprocessing and application of BDDC*/
219   if (n_constraints) {
220     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
221     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
222     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
223     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
224 
225     /* Extract constraints on R nodes: C_{CR}  */
226     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
227     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
228     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
229 
230     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
231     for (i=0;i<n_constraints;i++) {
232       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
233       /* Get row of constraint matrix in R numbering */
234       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
235       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
237       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
238       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
239       /* Solve for row of constraint matrix in R numbering */
240       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
241       /* Set values in local_auxmat2 */
242       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
243       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
244       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
245     }
246     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
249 
250     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
251     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
252     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
253     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
254     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
255     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
256     ierr = MatSetUp(M1);CHKERRQ(ierr);
257     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
258     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
259     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
260     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
261     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
262     ierr = MatDestroy(&M2);CHKERRQ(ierr);
263     ierr = MatDestroy(&M3);CHKERRQ(ierr);
264     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
265     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
266   }
267 
268   /* Get submatrices from subdomain matrix */
269   if (n_vertices) {
270     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
271     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
272     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
273     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
274     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
275   }
276 
277   /* Matrix of coarse basis functions (local) */
278   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
279   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
280   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
281   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
282   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
283     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
284     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
285     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
286     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
287   }
288 
289   if (pcbddc->dbg_flag) {
290     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
291     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
292     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
293   }
294   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
295   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
296 
297   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
298 
299   /* vertices */
300   for (i=0;i<n_vertices;i++) {
301     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
302     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
303     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
304     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
305     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
306     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
307     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
308     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
309     if (n_constraints) {
310       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
311       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
312       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
313     }
314     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
315     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
316 
317     /* Set values in coarse basis function and subdomain part of coarse_mat */
318     /* coarse basis functions */
319     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
320     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
322     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
323     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
324     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
325     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
326     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
327       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
328       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
329       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
330       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
331       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
332     }
333     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
334     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
335     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
336     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
337     if (n_constraints) {
338       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
339       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
340       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
341     }
342 
343     /* check */
344     if (pcbddc->dbg_flag) {
345       /* assemble subdomain vector on local nodes */
346       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
348       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
350       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
351       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
352       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
353       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
354       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
355       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
356       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
357       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
358       if (n_constraints) {
359         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
360         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
361         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
362       }
363       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
364       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
365       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
366       /* check saddle point solution */
367       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
368       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
369       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
370       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
371       /* shift by the identity matrix */
372       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
373       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
374       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
375       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
376     }
377   }
378 
379   /* constraints */
380   for (i=0;i<n_constraints;i++) {
381     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
382     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
383     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
384     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
385     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
386     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
387     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
388     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
389     if (n_vertices) {
390       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
391     }
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     j = i+n_vertices; /* don't touch this! */
395     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
396     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
400     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     if (n_vertices) {
410       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
411       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
412       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     }
414     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
416     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
417 
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
425       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
426       /* assemble subdomain vector of lagrange multipliers */
427       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
428       if (n_vertices) {
429         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       }
433       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
439       /* check saddle point solution */
440       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
443       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
444       /* shift by the identity matrix */
445       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
449     }
450   }
451   /* call assembling routines for local coarse basis */
452   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
454   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
455     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   }
458 
459   /* compute other basis functions for non-symmetric problems */
460   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
461   if (!setsym || (setsym && !issym)) {
462     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
463     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
464     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
465     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
466     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
467       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
468       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
469       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
470       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
471     }
472     for (i=0;i<pcbddc->local_primal_size;i++) {
473       if (n_constraints) {
474         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
475         for (j=0;j<n_constraints;j++) {
476           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
477         }
478         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
479         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
480       }
481       if (i<n_vertices) {
482         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
483         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
484         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
485         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
486         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
487         if (n_constraints) {
488           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
489         }
490       } else {
491         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
492       }
493       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
494       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
495       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
498       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
499       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
500       if (i<n_vertices) {
501         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
502       }
503       if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
504         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
507         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
509       }
510 
511       if (pcbddc->dbg_flag) {
512         /* assemble subdomain vector on nodes */
513         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
514         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
515         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
516         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
517         if (i<n_vertices) {
518           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
519         }
520         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
521         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
522         /* assemble subdomain vector of lagrange multipliers */
523         for (j=0;j<pcbddc->local_primal_size;j++) {
524           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
525         }
526         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
527         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
528         /* check saddle point solution */
529         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
530         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
531         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
532         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
533         /* shift by the identity matrix */
534         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
535         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
536         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
537         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
538       }
539     }
540     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
541     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
542     if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
543       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
544       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     }
546   }
547   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
548   /* Checking coarse_sub_mat and coarse basis functios */
549   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
550   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
551   if (pcbddc->dbg_flag) {
552     Mat         coarse_sub_mat;
553     Mat         AUXMAT,TM1,TM2,TM3,TM4;
554     Mat         coarse_phi_D,coarse_phi_B;
555     Mat         coarse_psi_D,coarse_psi_B;
556     Mat         A_II,A_BB,A_IB,A_BI;
557     MatType     checkmattype=MATSEQAIJ;
558     PetscReal   real_value;
559 
560     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
561     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
562     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
563     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
564     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
565     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
566     if (pcbddc->coarse_psi_B) {
567       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
568       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
569     }
570     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
571 
572     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
573     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
574     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
575     if (pcbddc->coarse_psi_B) {
576       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
577       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
578       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
579       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
580       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
581       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
582       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
583       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
584       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
585       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
586       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
587       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
588     } else {
589       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
590       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
591       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
592       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
593       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
594       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
595       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
596       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
597     }
598     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
599     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
600     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
601     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
602     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
604     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
605     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
606     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
607     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
608     for (i=0;i<pcbddc->local_primal_size;i++) {
609       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
610     }
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
612     for (i=0;i<pcbddc->local_primal_size;i++) {
613       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
614     }
615     if (pcbddc->coarse_psi_B) {
616       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
617       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
618         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
619       }
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
621       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
622         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
623       }
624     }
625     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
626     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
627     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
628     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
629     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
630     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
631     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
632     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
633     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
634     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
635     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
636     if (pcbddc->coarse_psi_B) {
637       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
638       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
639     }
640     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
641     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
642     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
643     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
644   }
645   /* free memory */
646   if (n_vertices) {
647     ierr = PetscFree(vertices);CHKERRQ(ierr);
648     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
649     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
652     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
653   }
654   if (n_constraints) {
655     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
656     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
657     ierr = MatDestroy(&M1);CHKERRQ(ierr);
658     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
659   }
660   ierr = PetscFree(auxindices);CHKERRQ(ierr);
661   /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */
662   ierr = PCBDDCSetUpCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr);
663   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
669 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
670 {
671   PC_IS*            pcis = (PC_IS*)(pc->data);
672   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
673   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
674   /* manage repeated solves */
675   MatReuse          reuse;
676   MatStructure      matstruct;
677   PetscErrorCode    ierr;
678 
679   PetscFunctionBegin;
680   /* get mat flags */
681   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
682   reuse = MAT_INITIAL_MATRIX;
683   if (pc->setupcalled) {
684     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
685     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
686     if (matstruct == SAME_NONZERO_PATTERN) {
687       reuse = MAT_REUSE_MATRIX;
688     } else {
689       reuse = MAT_INITIAL_MATRIX;
690     }
691   }
692   if (reuse == MAT_INITIAL_MATRIX) {
693     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
694     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
695     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
696     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
698   }
699 
700   /* transform local matrices if needed */
701   if (pcbddc->use_change_of_basis) {
702     Mat         change_mat_all;
703     PetscScalar *row_cmat_values;
704     PetscInt    *row_cmat_indices;
705     PetscInt    *nnz,*is_indices,*temp_indices;
706     PetscInt    i,j,k,n_D,n_B;
707 
708     /* Get Non-overlapping dimensions */
709     n_B = pcis->n_B;
710     n_D = pcis->n-n_B;
711 
712     /* compute nonzero structure of change of basis on all local nodes */
713     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
714     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
715     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
716     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
718     k=1;
719     for (i=0;i<n_B;i++) {
720       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
721       nnz[is_indices[i]]=j;
722       if (k < j) k = j;
723       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
724     }
725     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
726     /* assemble change of basis matrix on the whole set of local dofs */
727     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
728     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
729     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
730     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
731     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
732     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
733     for (i=0;i<n_D;i++) {
734       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
735     }
736     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
738     for (i=0;i<n_B;i++) {
739       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
740       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
741       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
742       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
743     }
744     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
746     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
747     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
748     if (i==1) {
749       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
750     } else {
751       Mat work_mat;
752       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
753       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
754       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
755     }
756     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
757     ierr = PetscFree(nnz);CHKERRQ(ierr);
758     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
759   } else {
760     /* without change of basis, the local matrix is unchanged */
761     if (!pcbddc->local_mat) {
762       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
763       pcbddc->local_mat = matis->A;
764     }
765   }
766 
767   /* get submatrices */
768   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
769   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
770   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
777 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
778 {
779   PC_IS*         pcis = (PC_IS*)(pc->data);
780   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
781   IS             is_aux1,is_aux2;
782   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
783   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
784   PetscBool      *array_bool;
785   PetscErrorCode ierr;
786 
787   PetscFunctionBegin;
788   /* Set Non-overlapping dimensions */
789   n_B = pcis->n_B; n_D = pcis->n - n_B;
790   /* get vertex indices from constraint matrix */
791   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
792   /* Set number of constraints */
793   n_constraints = pcbddc->local_primal_size-n_vertices;
794   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
795   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&array_bool);CHKERRQ(ierr);
796   for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_TRUE;
797   for (i=0;i<n_vertices;i++) array_bool[vertices[i]] = PETSC_FALSE;
798   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
799   for (i=0, n_R=0; i<pcis->n; i++) {
800     if (array_bool[i]) {
801       idx_R_local[n_R] = i;
802       n_R++;
803     }
804   }
805   ierr = PetscFree(vertices);CHKERRQ(ierr);
806   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
807 
808   /* print some info if requested */
809   if (pcbddc->dbg_flag) {
810     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
811     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
812     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
813     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
814     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
815     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
816     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
817   }
818 
819   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
820   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
821   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
822   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
823   for (i=0; i<n_D; i++) array_bool[is_indices[i]] = PETSC_FALSE;
824   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
825   for (i=0, j=0; i<n_R; i++) {
826     if (array_bool[idx_R_local[i]]) {
827       aux_array1[j] = i;
828       j++;
829     }
830   }
831   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
832   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   for (i=0, j=0; i<n_B; i++) {
834     if (array_bool[is_indices[i]]) {
835       aux_array2[j] = i; j++;
836     }
837   }
838   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
839   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
840   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
841   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
842   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
843 
844   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
845     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
846     for (i=0, j=0; i<n_R; i++) {
847       if (!array_bool[idx_R_local[i]]) {
848         aux_array1[j] = i;
849         j++;
850       }
851     }
852     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
853     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
854     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
855   }
856   ierr = PetscFree(array_bool);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
862 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
863 {
864   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
865 
866   PetscFunctionBegin;
867   pcbddc->use_exact_dirichlet=use;
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
873 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
874 {
875   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
876   PC_IS          *pcis = (PC_IS*)pc->data;
877   PC             pc_temp;
878   Mat            A_RR;
879   Vec            vec1,vec2,vec3;
880   MatStructure   matstruct;
881   PetscScalar    m_one = -1.0;
882   PetscReal      value;
883   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   /* Creating PC contexts for local Dirichlet and Neumann problems */
888   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
889 
890   /* DIRICHLET PROBLEM */
891   /* Matrix for Dirichlet problem is pcis->A_II */
892   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
893   if (!pcbddc->ksp_D) { /* create object if not yet build */
894     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
895     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
896     /* default */
897     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
898     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
899     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
900     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
901     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
902   }
903   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
904   /* Allow user's customization */
905   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
906   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
907   if (!n_D) {
908     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
909     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
910   }
911   /* Set Up KSP for Dirichlet problem of BDDC */
912   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
913   /* set ksp_D into pcis data */
914   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
915   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
916   pcis->ksp_D = pcbddc->ksp_D;
917 
918   /* NEUMANN PROBLEM */
919   /* Matrix for Neumann problem is A_RR -> we need to create it */
920   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
921   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
922   if (!pcbddc->ksp_R) { /* create object if not yet build */
923     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
924     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
925     /* default */
926     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
927     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
928     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
929     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
930     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
931   }
932   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
933   /* Allow user's customization */
934   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
935   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
936   if (!n_R) {
937     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
938     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
939   }
940   /* Set Up KSP for Neumann problem of BDDC */
941   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
942 
943   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
944 
945   /* Dirichlet */
946   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
947   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
948   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
949   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
950   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
951   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
952   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
953   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
954   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
955   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
956   /* need to be adapted? */
957   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
958   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
959   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
960   /* print info */
961   if (pcbddc->dbg_flag) {
962     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
963     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
964     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
965     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
966     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
967   }
968   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) {
969     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
970   }
971 
972   /* Neumann */
973   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
974   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
975   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
976   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
977   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
978   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
979   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
980   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
981   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
982   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
983   /* need to be adapted? */
984   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
985   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
986   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
987   /* print info */
988   if (pcbddc->dbg_flag) {
989     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
990     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
991   }
992   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
993     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
994   }
995 
996   /* free Neumann problem's matrix */
997   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1003 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1004 {
1005   PetscErrorCode ierr;
1006   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1007 
1008   PetscFunctionBegin;
1009   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1010   if (pcbddc->local_auxmat1) {
1011     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1012     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1019 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1020 {
1021   PetscErrorCode ierr;
1022   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1023   PC_IS*            pcis = (PC_IS*)  (pc->data);
1024   const PetscScalar zero = 0.0;
1025 
1026   PetscFunctionBegin;
1027   /* Application of PHI^T (or PSI^T)  */
1028   if (pcbddc->coarse_psi_B) {
1029     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1030     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1031   } else {
1032     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1033     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1034   }
1035   /* Scatter data of coarse_rhs */
1036   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1037   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038 
1039   /* Local solution on R nodes */
1040   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1041   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1043   if (pcbddc->inexact_prec_type) {
1044     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1046   }
1047   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1048   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1049   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051   if (pcbddc->inexact_prec_type) {
1052     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054   }
1055 
1056   /* Coarse solution */
1057   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1059     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1060   }
1061   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1062   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1063 
1064   /* Sum contributions from two levels */
1065   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1066   if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1072 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1073 {
1074   PetscErrorCode ierr;
1075   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1076 
1077   PetscFunctionBegin;
1078   switch (pcbddc->coarse_communications_type) {
1079     case SCATTERS_BDDC:
1080       ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1081       break;
1082     case GATHERS_BDDC:
1083       break;
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1090 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1091 {
1092   PetscErrorCode ierr;
1093   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1094   PetscScalar*   array_to;
1095   PetscScalar*   array_from;
1096   MPI_Comm       comm;
1097   PetscInt       i;
1098 
1099   PetscFunctionBegin;
1100   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1101   switch (pcbddc->coarse_communications_type) {
1102     case SCATTERS_BDDC:
1103       ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1104       break;
1105     case GATHERS_BDDC:
1106       if (vec_from) {
1107         ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr);
1108       }
1109       if (vec_to) {
1110         ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr);
1111       }
1112       switch(pcbddc->coarse_problem_type){
1113         case SEQUENTIAL_BDDC:
1114           if (smode == SCATTER_FORWARD) {
1115             ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1116             if (vec_to) {
1117               if (imode == ADD_VALUES) {
1118                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1119                   array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1120                 }
1121               } else {
1122                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1123                   array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1124                 }
1125               }
1126             }
1127           } else {
1128             if (vec_from) {
1129               if (imode == ADD_VALUES) {
1130                 MPI_Comm vec_from_comm;
1131                 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr);
1132                 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type);
1133               }
1134               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1135                 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
1136               }
1137             }
1138             ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1139           }
1140           break;
1141         case REPLICATED_BDDC:
1142           if (smode == SCATTER_FORWARD) {
1143             ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr);
1144             if (imode == ADD_VALUES) {
1145               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1146                 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1147               }
1148             } else {
1149               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1150                 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1151               }
1152             }
1153           } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
1154             if (imode == ADD_VALUES) {
1155               for (i=0;i<pcbddc->local_primal_size;i++) {
1156                 array_to[i]+=array_from[pcbddc->local_primal_indices[i]];
1157               }
1158             } else {
1159               for (i=0;i<pcbddc->local_primal_size;i++) {
1160                 array_to[i]=array_from[pcbddc->local_primal_indices[i]];
1161               }
1162             }
1163           }
1164           break;
1165         case MULTILEVEL_BDDC:
1166           break;
1167         case PARALLEL_BDDC:
1168           break;
1169       }
1170       if (vec_from) {
1171         ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr);
1172       }
1173       if (vec_to) {
1174         ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr);
1175       }
1176       break;
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /* uncomment for testing purposes */
1182 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1183 #undef __FUNCT__
1184 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1185 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1186 {
1187   PetscErrorCode    ierr;
1188   PC_IS*            pcis = (PC_IS*)(pc->data);
1189   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1190   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1191   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1192   MatType           impMatType=MATSEQAIJ;
1193   /* one and zero */
1194   PetscScalar       one=1.0,zero=0.0;
1195   /* space to store constraints and their local indices */
1196   PetscScalar       *temp_quadrature_constraint;
1197   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1198   /* iterators */
1199   PetscInt          i,j,k,total_counts,temp_start_ptr;
1200   /* stuff to store connected components stored in pcbddc->mat_graph */
1201   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1202   PetscInt          n_ISForFaces,n_ISForEdges;
1203   PetscBool         get_faces,get_edges,get_vertices;
1204   /* near null space stuff */
1205   MatNullSpace      nearnullsp;
1206   const Vec         *nearnullvecs;
1207   Vec               *localnearnullsp;
1208   PetscBool         nnsp_has_cnst;
1209   PetscInt          nnsp_size;
1210   PetscScalar       *array;
1211   /* BLAS integers */
1212   PetscBLASInt      lwork,lierr;
1213   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1214   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1215   /* LAPACK working arrays for SVD or POD */
1216   PetscBool         skip_lapack;
1217   PetscScalar       *work;
1218   PetscReal         *singular_vals;
1219 #if defined(PETSC_USE_COMPLEX)
1220   PetscReal         *rwork;
1221 #endif
1222 #if defined(PETSC_MISSING_LAPACK_GESVD)
1223   PetscBLASInt      Blas_one_2=1;
1224   PetscScalar       *temp_basis,*correlation_mat;
1225 #else
1226   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1227   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1228 #endif
1229   /* change of basis */
1230   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1231   PetscBool         boolforchange,*change_basis,*touched;
1232   /* auxiliary stuff */
1233   PetscInt          *nnz,*is_indices,*local_to_B;
1234   /* some quantities */
1235   PetscInt          n_vertices,total_primal_vertices;
1236   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1237 
1238 
1239   PetscFunctionBegin;
1240   /* Get index sets for faces, edges and vertices from graph */
1241   get_faces = PETSC_TRUE;
1242   get_edges = PETSC_TRUE;
1243   get_vertices = PETSC_TRUE;
1244   if (pcbddc->vertices_flag) {
1245     get_faces = PETSC_FALSE;
1246     get_edges = PETSC_FALSE;
1247   }
1248   if (pcbddc->constraints_flag) {
1249     get_vertices = PETSC_FALSE;
1250   }
1251   if (pcbddc->faces_flag) {
1252     get_edges = PETSC_FALSE;
1253   }
1254   if (pcbddc->edges_flag) {
1255     get_faces = PETSC_FALSE;
1256   }
1257   /* default */
1258   if (!get_faces && !get_edges && !get_vertices) {
1259     get_vertices = PETSC_TRUE;
1260   }
1261   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1262   /* print some info */
1263   if (pcbddc->dbg_flag) {
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1265     i = 0;
1266     if (ISForVertices) {
1267       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1268     }
1269     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1270     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1271     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1272     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1273   }
1274   /* check if near null space is attached to global mat */
1275   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1276   if (nearnullsp) {
1277     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1278   } else { /* if near null space is not provided BDDC uses constants by default */
1279     nnsp_size = 0;
1280     nnsp_has_cnst = PETSC_TRUE;
1281   }
1282   /* get max number of constraints on a single cc */
1283   max_constraints = nnsp_size;
1284   if (nnsp_has_cnst) max_constraints++;
1285 
1286   /*
1287        Evaluate maximum storage size needed by the procedure
1288        - temp_indices will contain start index of each constraint stored as follows
1289        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1290        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1291        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1292                                                                                                                                                          */
1293   total_counts = n_ISForFaces+n_ISForEdges;
1294   total_counts *= max_constraints;
1295   n_vertices = 0;
1296   if (ISForVertices) {
1297     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1298   }
1299   total_counts += n_vertices;
1300   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1301   ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr);
1302   total_counts = 0;
1303   max_size_of_constraint = 0;
1304   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1305     if (i<n_ISForEdges) {
1306       used_IS = &ISForEdges[i];
1307     } else {
1308       used_IS = &ISForFaces[i-n_ISForEdges];
1309     }
1310     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1311     total_counts += j;
1312     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1313   }
1314   total_counts *= max_constraints;
1315   total_counts += n_vertices;
1316   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1317   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1318   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1319   /* local to boundary numbering */
1320   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1321   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1322   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1323   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1324   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1325   /* get local part of global near null space vectors */
1326   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1327   for (k=0;k<nnsp_size;k++) {
1328     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1329     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1331   }
1332 
1333   /* whether or not to skip lapack calls */
1334   skip_lapack = PETSC_TRUE;
1335   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1336 
1337   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1338   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1339     PetscScalar temp_work;
1340 #if defined(PETSC_MISSING_LAPACK_GESVD)
1341     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1342     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1343     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1344     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1345 #if defined(PETSC_USE_COMPLEX)
1346     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1347 #endif
1348     /* now we evaluate the optimal workspace using query with lwork=-1 */
1349     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1350     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1351     lwork = -1;
1352     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1353 #if !defined(PETSC_USE_COMPLEX)
1354     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1355 #else
1356     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1357 #endif
1358     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1359     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1360 #else /* on missing GESVD */
1361     /* SVD */
1362     PetscInt max_n,min_n;
1363     max_n = max_size_of_constraint;
1364     min_n = max_constraints;
1365     if (max_size_of_constraint < max_constraints) {
1366       min_n = max_size_of_constraint;
1367       max_n = max_constraints;
1368     }
1369     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1370 #if defined(PETSC_USE_COMPLEX)
1371     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1372 #endif
1373     /* now we evaluate the optimal workspace using query with lwork=-1 */
1374     lwork = -1;
1375     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1376     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1377     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1378     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1379 #if !defined(PETSC_USE_COMPLEX)
1380     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1381 #else
1382     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1383 #endif
1384     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1385     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1386 #endif /* on missing GESVD */
1387     /* Allocate optimal workspace */
1388     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1389     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1390   }
1391   /* Now we can loop on constraining sets */
1392   total_counts = 0;
1393   temp_indices[0] = 0;
1394   /* vertices */
1395   if (ISForVertices) {
1396     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1397     if (nnsp_has_cnst) { /* consider all vertices */
1398       for (i=0;i<n_vertices;i++) {
1399         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1400         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1401         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1402         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1403         change_basis[total_counts]=PETSC_FALSE;
1404         total_counts++;
1405       }
1406     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1407       PetscBool used_vertex;
1408       for (i=0;i<n_vertices;i++) {
1409         used_vertex = PETSC_FALSE;
1410         k = 0;
1411         while (!used_vertex && k<nnsp_size) {
1412           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1413           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1414             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1415             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1416             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1417             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1418             change_basis[total_counts]=PETSC_FALSE;
1419             total_counts++;
1420             used_vertex = PETSC_TRUE;
1421           }
1422           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1423           k++;
1424         }
1425       }
1426     }
1427     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1428     n_vertices = total_counts;
1429   }
1430 
1431   /* edges and faces */
1432   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1433     if (i<n_ISForEdges) {
1434       used_IS = &ISForEdges[i];
1435       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1436     } else {
1437       used_IS = &ISForFaces[i-n_ISForEdges];
1438       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1439     }
1440     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1441     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1442     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1443     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1444     /* change of basis should not be performed on local periodic nodes */
1445     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1446     if (nnsp_has_cnst) {
1447       PetscScalar quad_value;
1448       temp_constraints++;
1449       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1450       for (j=0;j<size_of_constraint;j++) {
1451         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1452         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1453         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1454       }
1455       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1456       change_basis[total_counts]=boolforchange;
1457       total_counts++;
1458     }
1459     for (k=0;k<nnsp_size;k++) {
1460       PetscReal real_value;
1461       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1462       for (j=0;j<size_of_constraint;j++) {
1463         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1464         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1465         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1466       }
1467       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1468       /* check if array is null on the connected component */
1469       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1470       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1471       if (real_value > 0.0) { /* keep indices and values */
1472         temp_constraints++;
1473         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1474         change_basis[total_counts]=boolforchange;
1475         total_counts++;
1476       }
1477     }
1478     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1479     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1480     if (!pcbddc->use_nnsp_true) {
1481       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1482 
1483 #if defined(PETSC_MISSING_LAPACK_GESVD)
1484       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1485          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1486          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1487             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1488             from that computed using LAPACKgesvd
1489          -> This is due to a different computation of eigenvectors in LAPACKheev
1490          -> The quality of the POD-computed basis will be the same */
1491       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1492       /* Store upper triangular part of correlation matrix */
1493       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1494       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1495       for (j=0;j<temp_constraints;j++) {
1496         for (k=0;k<j+1;k++) {
1497           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1498         }
1499       }
1500       /* compute eigenvalues and eigenvectors of correlation matrix */
1501       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1502       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1503 #if !defined(PETSC_USE_COMPLEX)
1504       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1505 #else
1506       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1507 #endif
1508       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1509       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1510       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1511       j=0;
1512       while (j < temp_constraints && singular_vals[j] < tol) j++;
1513       total_counts=total_counts-j;
1514       /* scale and copy POD basis into used quadrature memory */
1515       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1516       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1517       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1518       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1519       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1520       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1521       if (j<temp_constraints) {
1522         PetscInt ii;
1523         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1524         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1525         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1526         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1527         for (k=0;k<temp_constraints-j;k++) {
1528           for (ii=0;ii<size_of_constraint;ii++) {
1529             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1530           }
1531         }
1532       }
1533 #else  /* on missing GESVD */
1534       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1535       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1536       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1537       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1538 #if !defined(PETSC_USE_COMPLEX)
1539       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1540 #else
1541       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1542 #endif
1543       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1544       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1545       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1546       k = temp_constraints;
1547       if (k > size_of_constraint) k = size_of_constraint;
1548       j = 0;
1549       while (j < k && singular_vals[k-j-1] < tol) j++;
1550       total_counts = total_counts-temp_constraints+k-j;
1551 #endif /* on missing GESVD */
1552     }
1553   }
1554   /* free index sets of faces, edges and vertices */
1555   for (i=0;i<n_ISForFaces;i++) {
1556     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1557   }
1558   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1559   for (i=0;i<n_ISForEdges;i++) {
1560     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1561   }
1562   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1563   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1564 
1565   /* free workspace */
1566   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1567     ierr = PetscFree(work);CHKERRQ(ierr);
1568 #if defined(PETSC_USE_COMPLEX)
1569     ierr = PetscFree(rwork);CHKERRQ(ierr);
1570 #endif
1571     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1572 #if defined(PETSC_MISSING_LAPACK_GESVD)
1573     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1574     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1575 #endif
1576   }
1577   for (k=0;k<nnsp_size;k++) {
1578     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1579   }
1580   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1581 
1582   /* set quantities in pcbddc data structure */
1583   /* n_vertices defines the number of subdomain corners in the primal space */
1584   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1585   pcbddc->local_primal_size = total_counts;
1586   pcbddc->n_vertices = n_vertices;
1587   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1588 
1589   /* Create constraint matrix */
1590   /* The constraint matrix is used to compute the l2g map of primal dofs */
1591   /* so we need to set it up properly either with or without change of basis */
1592   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1593   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1594   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1595   /* array to compute a local numbering of constraints : vertices first then constraints */
1596   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1597   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1598   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1599   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1600   /* auxiliary stuff for basis change */
1601   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1602   ierr = PetscMalloc(pcis->n_B*sizeof(PetscBool),&touched);CHKERRQ(ierr);
1603   ierr = PetscMemzero(touched,pcis->n_B*sizeof(PetscBool));CHKERRQ(ierr);
1604 
1605   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1606   total_primal_vertices=0;
1607   for (i=0;i<pcbddc->local_primal_size;i++) {
1608     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1609     if (size_of_constraint == 1) {
1610       touched[temp_indices_to_constraint_B[temp_indices[i]]]=PETSC_TRUE;
1611       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1612       aux_primal_minloc[total_primal_vertices]=0;
1613       total_primal_vertices++;
1614     } else if (change_basis[i]) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1615       PetscInt min_loc,min_index;
1616       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1617       /* find first untouched local node */
1618       k = 0;
1619       while (touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) k++;
1620       min_index = global_indices[k];
1621       min_loc = k;
1622       /* search the minimum among global nodes already untouched on the cc */
1623       for (k=1;k<size_of_constraint;k++) {
1624         /* there can be more than one constraint on a single connected component */
1625         if (min_index > global_indices[k] && !touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) {
1626           min_index = global_indices[k];
1627           min_loc = k;
1628         }
1629       }
1630       touched[temp_indices_to_constraint_B[temp_indices[i]+min_loc]] = PETSC_TRUE;
1631       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1632       aux_primal_minloc[total_primal_vertices]=min_loc;
1633       total_primal_vertices++;
1634     }
1635   }
1636   /* free workspace */
1637   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1638   ierr = PetscFree(touched);CHKERRQ(ierr);
1639   /* permute indices in order to have a sorted set of vertices */
1640   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1641 
1642   /* nonzero structure of constraint matrix */
1643   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1644   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1645   j=total_primal_vertices;
1646   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1647     if (!change_basis[i]) {
1648       nnz[j]=temp_indices[i+1]-temp_indices[i];
1649       j++;
1650     }
1651   }
1652   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1653   ierr = PetscFree(nnz);CHKERRQ(ierr);
1654   /* set values in constraint matrix */
1655   for (i=0;i<total_primal_vertices;i++) {
1656     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1657   }
1658   total_counts = total_primal_vertices;
1659   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1660     if (!change_basis[i]) {
1661       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1662       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1663       total_counts++;
1664     }
1665   }
1666   /* assembling */
1667   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   /*
1670   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1671   */
1672   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1673   if (pcbddc->use_change_of_basis) {
1674     PetscBool qr_needed = PETSC_FALSE;
1675     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1676     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1677     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1678     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1679     /* work arrays */
1680     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1681     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1682     /* nonzeros per row */
1683     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1684       if (change_basis[i]) {
1685         qr_needed = PETSC_TRUE;
1686         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1687         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1688       }
1689     }
1690     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1691     ierr = PetscFree(nnz);CHKERRQ(ierr);
1692     /* Set initial identity in the matrix */
1693     for (i=0;i<pcis->n_B;i++) {
1694       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1695     }
1696 
1697     /* Now we loop on the constraints which need a change of basis */
1698     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1699     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1700     /* Change of basis matrix T computed via QR decomposition of constraints */
1701     if (qr_needed) {
1702       /* dual and primal dofs on a single cc */
1703       PetscInt     dual_dofs,primal_dofs;
1704       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1705       PetscInt     primal_counter;
1706       /* working stuff for GEQRF */
1707       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1708       PetscBLASInt lqr_work;
1709       /* working stuff for UNGQR */
1710       PetscScalar  *gqr_work,lgqr_work_t;
1711       PetscBLASInt lgqr_work;
1712       /* working stuff for TRTRS */
1713       PetscScalar  *trs_rhs;
1714       PetscBLASInt Blas_NRHS;
1715       /* pointers for values insertion into change of basis matrix */
1716       PetscInt     *start_rows,*start_cols;
1717       PetscScalar  *start_vals;
1718       /* working stuff for values insertion */
1719       PetscBool    *is_primal;
1720 
1721       /* space to store Q */
1722       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1723       /* first we issue queries for optimal work */
1724       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1725       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1726       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1727       lqr_work = -1;
1728       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1729       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1730       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1731       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1732       lgqr_work = -1;
1733       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1734       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1735       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1736       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1737       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1738       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1739       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1740       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1741       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1742       /* array to store scaling factors for reflectors */
1743       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1744       /* array to store rhs and solution of triangular solver */
1745       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1746       /* array to store whether a node is primal or not */
1747       ierr = PetscMalloc(pcis->n_B*sizeof(*is_primal),&is_primal);CHKERRQ(ierr);
1748       ierr = PetscMemzero(is_primal,pcis->n_B*sizeof(*is_primal));CHKERRQ(ierr);
1749       for (i=0;i<total_primal_vertices;i++) is_primal[local_to_B[aux_primal_numbering[i]]] = PETSC_TRUE;
1750 
1751       /* allocating workspace for check */
1752       if (pcbddc->dbg_flag) {
1753         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1754         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1755         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1756       }
1757 
1758       /* loop on constraints and see whether or not they need a change of basis */
1759       /* -> using implicit ordering contained in temp_indices data */
1760       total_counts = pcbddc->n_vertices;
1761       primal_counter = total_counts;
1762       while (total_counts<pcbddc->local_primal_size) {
1763         primal_dofs = 1;
1764         if (change_basis[total_counts]) {
1765           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1766           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1767             primal_dofs++;
1768           }
1769           /* get constraint info */
1770           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1771           dual_dofs = size_of_constraint-primal_dofs;
1772 
1773           /* copy quadrature constraints for change of basis check */
1774           if (pcbddc->dbg_flag) {
1775             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1776             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1777           }
1778 
1779           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1780           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1781 
1782           /* compute QR decomposition of constraints */
1783           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1784           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1785           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1786           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1787           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1788           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1789           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1790 
1791           /* explictly compute R^-T */
1792           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1793           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1794           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1795           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1796           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1797           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1798           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1799           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1800           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1801           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1802 
1803           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1804           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1805           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1806           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1807           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1808           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1809           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1810           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1811           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1812 
1813           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1814              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1815              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1816           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1817           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1818           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1819           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1820           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1821           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1822           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1823           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1824           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1825           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1826 
1827           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1828           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1829           /* insert cols for primal dofs */
1830           for (j=0;j<primal_dofs;j++) {
1831             start_vals = &qr_basis[j*size_of_constraint];
1832             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1833             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1834           }
1835           /* insert cols for dual dofs */
1836           for (j=0,k=0;j<dual_dofs;k++) {
1837             if (!is_primal[temp_indices_to_constraint_B[temp_indices[total_counts]+k]]) {
1838               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1839               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1840               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1841               j++;
1842             }
1843           }
1844 
1845           /* check change of basis */
1846           if (pcbddc->dbg_flag) {
1847             PetscInt   ii,jj;
1848             PetscBool valid_qr=PETSC_TRUE;
1849             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1850             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1851             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1852             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1853             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1854             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1855             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1856             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1857             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1858             for (jj=0;jj<size_of_constraint;jj++) {
1859               for (ii=0;ii<primal_dofs;ii++) {
1860                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1861                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1862               }
1863             }
1864             if (!valid_qr) {
1865               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1866               for (jj=0;jj<size_of_constraint;jj++) {
1867                 for (ii=0;ii<primal_dofs;ii++) {
1868                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1869                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1870                   }
1871                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1872                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1873                   }
1874                 }
1875               }
1876             } else {
1877               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1878             }
1879           }
1880           /* increment primal counter */
1881           primal_counter += primal_dofs;
1882         } else {
1883           if (pcbddc->dbg_flag) {
1884             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1885           }
1886         }
1887         /* increment constraint counter total_counts */
1888         total_counts += primal_dofs;
1889       }
1890       if (pcbddc->dbg_flag) {
1891         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1892         ierr = PetscFree(work);CHKERRQ(ierr);
1893       }
1894       /* free workspace */
1895       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1896       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1897       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1898       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1899       ierr = PetscFree(is_primal);CHKERRQ(ierr);
1900       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1901     }
1902     /* assembling */
1903     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905     /*
1906     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1907     */
1908   }
1909   /* free workspace */
1910   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1911   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1912   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1913   ierr = PetscFree(change_basis);CHKERRQ(ierr);
1914   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1915   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1916   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1917   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1923 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1924 {
1925   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1926   PC_IS       *pcis = (PC_IS*)pc->data;
1927   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1928   PetscInt    bs,ierr,i,vertex_size;
1929   PetscViewer viewer=pcbddc->dbg_viewer;
1930 
1931   PetscFunctionBegin;
1932   /* Init local Graph struct */
1933   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1934 
1935   /* Check validity of the csr graph passed in by the user */
1936   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1937     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1938   }
1939   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1940   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1941     Mat mat_adj;
1942     const PetscInt *xadj,*adjncy;
1943     PetscBool flg_row=PETSC_TRUE;
1944 
1945     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1946     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1947     if (!flg_row) {
1948       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1949     }
1950     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1951     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1952     if (!flg_row) {
1953       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1954     }
1955     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1956   }
1957 
1958   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1959   vertex_size = 1;
1960   if (!pcbddc->n_ISForDofs) {
1961     IS *custom_ISForDofs;
1962 
1963     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1964     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1965     for (i=0;i<bs;i++) {
1966       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1967     }
1968     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1969     /* remove my references to IS objects */
1970     for (i=0;i<bs;i++) {
1971       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1972     }
1973     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1974   } else { /* mat block size as vertex size (used for elasticity) */
1975     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1976   }
1977 
1978   /* Setup of Graph */
1979   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1980 
1981   /* Graph's connected components analysis */
1982   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1983 
1984   /* print some info to stdout */
1985   if (pcbddc->dbg_flag) {
1986     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1993 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[])
1994 {
1995   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1996   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1997   PetscErrorCode ierr;
1998 
1999   PetscFunctionBegin;
2000   n = 0;
2001   vertices = 0;
2002   if (pcbddc->ConstraintMatrix) {
2003     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2004     for (i=0;i<local_primal_size;i++) {
2005       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2006       if (size_of_constraint == 1) n++;
2007       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2008     }
2009     if (vertices_idx) {
2010       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2011       n = 0;
2012       for (i=0;i<local_primal_size;i++) {
2013         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2014         if (size_of_constraint == 1) {
2015           vertices[n++]=row_cmat_indices[0];
2016         }
2017         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2018       }
2019     }
2020   }
2021   *n_vertices = n;
2022   if (vertices_idx) *vertices_idx = vertices;
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2028 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[])
2029 {
2030   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2031   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2032   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2033   PetscBool      *touched;
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   n = 0;
2038   constraints_index = 0;
2039   if (pcbddc->ConstraintMatrix) {
2040     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2041     max_size_of_constraint = 0;
2042     for (i=0;i<local_primal_size;i++) {
2043       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2044       if (size_of_constraint > 1) {
2045         n++;
2046       }
2047       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2048       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2049     }
2050     if (constraints_idx) {
2051       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2052       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2053       ierr = PetscMalloc(local_size*sizeof(PetscBool),&touched);CHKERRQ(ierr);
2054       ierr = PetscMemzero(touched,local_size*sizeof(PetscBool));CHKERRQ(ierr);
2055       n = 0;
2056       for (i=0;i<local_primal_size;i++) {
2057         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2058         if (size_of_constraint > 1) {
2059           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2060           /* find first untouched local node */
2061           j = 0;
2062           while(touched[row_cmat_indices[j]]) j++;
2063           min_index = row_cmat_global_indices[j];
2064           min_loc = j;
2065           /* search the minimum among nodes not yet touched on the connected component
2066              since there can be more than one constraint on a single cc */
2067           for (j=1;j<size_of_constraint;j++) {
2068             if (min_index > row_cmat_global_indices[j] && !touched[row_cmat_indices[j]]) {
2069               min_index = row_cmat_global_indices[j];
2070               min_loc = j;
2071             }
2072           }
2073           touched[row_cmat_indices[min_loc]] = PETSC_TRUE;
2074           constraints_index[n++] = row_cmat_indices[min_loc];
2075         }
2076         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2077       }
2078       ierr = PetscFree(touched);CHKERRQ(ierr);
2079       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2080     }
2081   }
2082   *n_constraints = n;
2083   if (constraints_idx) *constraints_idx = constraints_index;
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 /* the next two functions has been adapted from pcis.c */
2088 #undef __FUNCT__
2089 #define __FUNCT__ "PCBDDCApplySchur"
2090 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2091 {
2092   PetscErrorCode ierr;
2093   PC_IS          *pcis = (PC_IS*)(pc->data);
2094 
2095   PetscFunctionBegin;
2096   if (!vec2_B) { vec2_B = v; }
2097   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2098   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2099   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2100   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2101   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2107 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2108 {
2109   PetscErrorCode ierr;
2110   PC_IS          *pcis = (PC_IS*)(pc->data);
2111 
2112   PetscFunctionBegin;
2113   if (!vec2_B) { vec2_B = v; }
2114   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2115   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2116   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2117   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2118   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "PCBDDCSubsetNumbering"
2124 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2125 {
2126   Vec            local_vec,global_vec;
2127   IS             seqis,paris;
2128   VecScatter     scatter_ctx;
2129   PetscScalar    *array;
2130   PetscInt       *temp_global_dofs;
2131   PetscScalar    globalsum;
2132   PetscInt       i,j,s;
2133   PetscInt       nlocals,first_index,old_index,max_local;
2134   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2135   PetscMPIInt    *dof_sizes,*dof_displs;
2136   PetscBool      first_found;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   /* mpi buffers */
2141   MPI_Comm_size(comm,&size_prec_comm);
2142   MPI_Comm_rank(comm,&rank_prec_comm);
2143   j = ( !rank_prec_comm ? size_prec_comm : 0);
2144   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2145   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2146   /* get maximum size of subset */
2147   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2148   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2149   max_local = 0;
2150   if (n_local_dofs) {
2151     max_local = temp_global_dofs[0];
2152     for (i=1;i<n_local_dofs;i++) {
2153       if (max_local < temp_global_dofs[i] ) {
2154         max_local = temp_global_dofs[i];
2155       }
2156     }
2157   }
2158   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2159   max_global++;
2160   max_local = 0;
2161   if (n_local_dofs) {
2162     max_local = local_dofs[0];
2163     for (i=1;i<n_local_dofs;i++) {
2164       if (max_local < local_dofs[i] ) {
2165         max_local = local_dofs[i];
2166       }
2167     }
2168   }
2169   max_local++;
2170   /* allocate workspace */
2171   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2172   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2173   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2174   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2175   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2176   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2177   /* create scatter */
2178   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2179   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2180   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2181   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2182   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2183   /* init array */
2184   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2185   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2186   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2187   if (local_dofs_mult) {
2188     for (i=0;i<n_local_dofs;i++) {
2189       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2190     }
2191   } else {
2192     for (i=0;i<n_local_dofs;i++) {
2193       array[local_dofs[i]]=1.0;
2194     }
2195   }
2196   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2197   /* scatter into global vec and get total number of global dofs */
2198   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2199   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2200   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2201   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2202   /* Fill global_vec with cumulative function for global numbering */
2203   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2204   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2205   nlocals = 0;
2206   first_index = -1;
2207   first_found = PETSC_FALSE;
2208   for (i=0;i<s;i++) {
2209     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2210       first_found = PETSC_TRUE;
2211       first_index = i;
2212     }
2213     nlocals += (PetscInt)PetscRealPart(array[i]);
2214   }
2215   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2216   if (!rank_prec_comm) {
2217     dof_displs[0]=0;
2218     for (i=1;i<size_prec_comm;i++) {
2219       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2220     }
2221   }
2222   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2223   if (first_found) {
2224     array[first_index] += (PetscScalar)nlocals;
2225     old_index = first_index;
2226     for (i=first_index+1;i<s;i++) {
2227       if (PetscRealPart(array[i]) > 0.0) {
2228         array[i] += array[old_index];
2229         old_index = i;
2230       }
2231     }
2232   }
2233   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2234   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2235   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2236   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2237   /* get global ordering of local dofs */
2238   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2239   if (local_dofs_mult) {
2240     for (i=0;i<n_local_dofs;i++) {
2241       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2242     }
2243   } else {
2244     for (i=0;i<n_local_dofs;i++) {
2245       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2246     }
2247   }
2248   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2249   /* free workspace */
2250   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2251   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2252   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2253   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2254   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2255   /* return pointer to global ordering of local dofs */
2256   *global_numbering_subset = temp_global_dofs;
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2262 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2263 {
2264   PetscInt       i,j;
2265   PetscScalar    *alphas;
2266   PetscErrorCode ierr;
2267 
2268   PetscFunctionBegin;
2269   /* this implements stabilized Gram-Schmidt */
2270   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2271   for (i=0;i<n;i++) {
2272     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2273     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2274     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2275   }
2276   ierr = PetscFree(alphas);CHKERRQ(ierr);
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 /* BDDC requires metis 5.0.1 for multilevel */
2281 #if defined(PETSC_HAVE_METIS)
2282 #include "metis.h"
2283 #define MetisInt    idx_t
2284 #define MetisScalar real_t
2285 #endif
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "PCBDDCSetUpCoarseEnvironment"
2289 PetscErrorCode PCBDDCSetUpCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals)
2290 {
2291 
2292 
2293   Mat_IS    *matis    = (Mat_IS*)pc->pmat->data;
2294   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
2295   PC_IS     *pcis     = (PC_IS*)pc->data;
2296   MPI_Comm  prec_comm;
2297   MPI_Comm  coarse_comm;
2298 
2299   MatNullSpace CoarseNullSpace;
2300 
2301   /* common to all choiches */
2302   PetscScalar *temp_coarse_mat_vals;
2303   PetscScalar *ins_coarse_mat_vals;
2304   PetscInt    *ins_local_primal_indices;
2305   PetscMPIInt *localsizes2,*localdispl2;
2306   PetscMPIInt size_prec_comm;
2307   PetscMPIInt rank_prec_comm;
2308   PetscMPIInt active_rank=MPI_PROC_NULL;
2309   PetscMPIInt master_proc=0;
2310   PetscInt    ins_local_primal_size;
2311   /* specific to MULTILEVEL_BDDC */
2312   PetscMPIInt *ranks_recv=0;
2313   PetscMPIInt count_recv=0;
2314   PetscMPIInt rank_coarse_proc_send_to=-1;
2315   PetscMPIInt coarse_color = MPI_UNDEFINED;
2316   ISLocalToGlobalMapping coarse_ISLG;
2317   /* some other variables */
2318   PetscErrorCode ierr;
2319   MatType coarse_mat_type;
2320   PCType  coarse_pc_type;
2321   KSPType coarse_ksp_type;
2322   PC pc_temp;
2323   PetscInt i,j,k;
2324   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
2325   /* verbose output viewer */
2326   PetscViewer viewer=pcbddc->dbg_viewer;
2327   PetscInt    dbg_flag=pcbddc->dbg_flag;
2328 
2329   PetscInt      offset,offset2;
2330   PetscMPIInt   im_active,active_procs;
2331   PetscInt      *dnz,*onz;
2332 
2333   PetscBool     setsym,issym=PETSC_FALSE;
2334 
2335   PetscFunctionBegin;
2336   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2337   ins_local_primal_indices = 0;
2338   ins_coarse_mat_vals      = 0;
2339   localsizes2              = 0;
2340   localdispl2              = 0;
2341   temp_coarse_mat_vals     = 0;
2342   coarse_ISLG              = 0;
2343 
2344   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2345   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2346   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2347 
2348   /* Assign global numbering to coarse dofs */
2349   {
2350     PetscInt     *auxlocal_primal,*aux_idx;
2351     PetscMPIInt  mpi_local_primal_size;
2352     PetscScalar  coarsesum,*array;
2353 
2354     mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
2355 
2356     /* Construct needed data structures for message passing */
2357     j = 0;
2358     if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2359       j = size_prec_comm;
2360     }
2361     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr);
2362     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2363     /* Gather local_primal_size information for all processes  */
2364     if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2365       ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2366     } else {
2367       ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr);
2368     }
2369     pcbddc->replicated_primal_size = 0;
2370     for (i=0; i<j; i++) {
2371       pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ;
2372       pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
2373     }
2374 
2375     /* First let's count coarse dofs.
2376        This code fragment assumes that the number of local constraints per connected component
2377        is not greater than the number of nodes defined for the connected component
2378        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2379     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2380     ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2381     ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2382     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2383     ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2384     ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2385     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2386     /* Compute number of coarse dofs */
2387     ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&pcbddc->coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr);
2388 
2389     if (dbg_flag) {
2390       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2391       ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2392       ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr);
2393       ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2394       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2395       for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0;
2396       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2397       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2398       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2399       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2400       ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2401       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2402       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2403       for (i=0;i<pcis->n;i++) {
2404         if (array[i] == 1.0) {
2405           ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr);
2406           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr);
2407         }
2408       }
2409       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2410       for (i=0;i<pcis->n;i++) {
2411         if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
2412       }
2413       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2414       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2415       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2416       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2417       ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
2418       ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr);
2419       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2420     }
2421     ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
2422   }
2423 
2424   if (dbg_flag) {
2425     ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",pcbddc->coarse_size);CHKERRQ(ierr);
2426     if (dbg_flag > 1) {
2427       ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
2428       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2429       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2430       for (i=0;i<pcbddc->local_primal_size;i++) {
2431         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]);
2432       }
2433       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2434     }
2435   }
2436 
2437   im_active = 0;
2438   if (pcis->n) im_active = 1;
2439   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr);
2440 
2441   /* adapt coarse problem type */
2442 #if defined(PETSC_HAVE_METIS)
2443   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2444     if (pcbddc->current_level < pcbddc->max_levels) {
2445       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2446         if (dbg_flag) {
2447           ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2448          ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2449         }
2450         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2451       }
2452     } else {
2453       if (dbg_flag) {
2454         ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2455         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2456       }
2457       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2458     }
2459   }
2460 #else
2461   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2462 #endif
2463 
2464   switch(pcbddc->coarse_problem_type){
2465 
2466     case(MULTILEVEL_BDDC):   /* we define a coarse mesh where subdomains are elements */
2467     {
2468 #if defined(PETSC_HAVE_METIS)
2469       /* we need additional variables */
2470       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2471       MetisInt    *metis_coarse_subdivision;
2472       MetisInt    options[METIS_NOPTIONS];
2473       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2474       PetscMPIInt procs_jumps_coarse_comm;
2475       PetscMPIInt *coarse_subdivision;
2476       PetscMPIInt *total_count_recv;
2477       PetscMPIInt *total_ranks_recv;
2478       PetscMPIInt *displacements_recv;
2479       PetscMPIInt *my_faces_connectivity;
2480       PetscMPIInt *petsc_faces_adjncy;
2481       MetisInt    *faces_adjncy;
2482       MetisInt    *faces_xadj;
2483       PetscMPIInt *number_of_faces;
2484       PetscMPIInt *faces_displacements;
2485       PetscInt    *array_int;
2486       PetscMPIInt my_faces=0;
2487       PetscMPIInt total_faces=0;
2488       PetscInt    ranks_stretching_ratio;
2489 
2490       /* define some quantities */
2491       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2492       coarse_mat_type = MATIS;
2493       coarse_pc_type  = PCBDDC;
2494       coarse_ksp_type = KSPRICHARDSON;
2495 
2496       /* details of coarse decomposition */
2497       n_subdomains = active_procs;
2498       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2499       ranks_stretching_ratio = size_prec_comm/active_procs;
2500       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2501 
2502 #if 0
2503       PetscMPIInt *old_ranks;
2504       PetscInt    *new_ranks,*jj,*ii;
2505       MatPartitioning mat_part;
2506       IS coarse_new_decomposition,is_numbering;
2507       PetscViewer viewer_test;
2508       MPI_Comm    test_coarse_comm;
2509       PetscMPIInt test_coarse_color;
2510       Mat         mat_adj;
2511       /* Create new communicator for coarse problem splitting the old one */
2512       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2513          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2514       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2515       test_coarse_comm = MPI_COMM_NULL;
2516       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2517       if (im_active) {
2518         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2519         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2520         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2521         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2522         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2523         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2524         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2525         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2526         k = pcis->n_neigh-1;
2527         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2528         ii[0]=0;
2529         ii[1]=k;
2530         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2531         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2532         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2533         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2534         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2535         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2536         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2537         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2538         printf("Setting Nparts %d\n",n_parts);
2539         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2540         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2541         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2542         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2543         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2544         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2545         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2546         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2547         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2548         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2549         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2550         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2551         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2552       }
2553 #endif
2554 
2555       /* build CSR graph of subdomains' connectivity */
2556       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2557       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2558       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2559         for (j=0;j<pcis->n_shared[i];j++){
2560           array_int[ pcis->shared[i][j] ]+=1;
2561         }
2562       }
2563       for (i=1;i<pcis->n_neigh;i++){
2564         for (j=0;j<pcis->n_shared[i];j++){
2565           if (array_int[ pcis->shared[i][j] ] > 0 ){
2566             my_faces++;
2567             break;
2568           }
2569         }
2570       }
2571 
2572       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2573       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2574       my_faces=0;
2575       for (i=1;i<pcis->n_neigh;i++){
2576         for (j=0;j<pcis->n_shared[i];j++){
2577           if (array_int[ pcis->shared[i][j] ] > 0 ){
2578             my_faces_connectivity[my_faces]=pcis->neigh[i];
2579             my_faces++;
2580             break;
2581           }
2582         }
2583       }
2584       if (rank_prec_comm == master_proc) {
2585         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2586         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2587         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2588         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2589         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2590       }
2591       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2592       if (rank_prec_comm == master_proc) {
2593         faces_xadj[0]=0;
2594         faces_displacements[0]=0;
2595         j=0;
2596         for (i=1;i<size_prec_comm+1;i++) {
2597           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2598           if (number_of_faces[i-1]) {
2599             j++;
2600             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2601           }
2602         }
2603       }
2604       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2605       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2606       ierr = PetscFree(array_int);CHKERRQ(ierr);
2607       if (rank_prec_comm == master_proc) {
2608         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2609         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2610         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2611         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2612       }
2613 
2614       if ( rank_prec_comm == master_proc ) {
2615 
2616         PetscInt heuristic_for_metis=3;
2617 
2618         ncon=1;
2619         faces_nvtxs=n_subdomains;
2620         /* partition graoh induced by face connectivity */
2621         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2622         ierr = METIS_SetDefaultOptions(options);
2623         /* we need a contiguous partition of the coarse mesh */
2624         options[METIS_OPTION_CONTIG]=1;
2625         options[METIS_OPTION_NITER]=30;
2626         if (pcbddc->coarsening_ratio > 1) {
2627           if (n_subdomains>n_parts*heuristic_for_metis) {
2628             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2629             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2630             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2631             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2632           } else {
2633             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2634             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2635           }
2636         } else {
2637           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2638         }
2639         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2640         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2641         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2642 
2643         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2644         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2645         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2646         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2647       }
2648 
2649       /* Create new communicator for coarse problem splitting the old one */
2650       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2651         coarse_color=0;              /* for communicator splitting */
2652         active_rank=rank_prec_comm;  /* for insertion of matrix values */
2653       }
2654       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2655          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2656       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2657 
2658       if ( coarse_color == 0 ) {
2659         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2660         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2661       } else {
2662         rank_coarse_comm = MPI_PROC_NULL;
2663       }
2664 
2665       /* master proc take care of arranging and distributing coarse information */
2666       if (rank_coarse_comm == master_proc) {
2667         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2668         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2669         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2670         /* some initializations */
2671         displacements_recv[0]=0;
2672         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2673         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2674         for (j=0;j<size_coarse_comm;j++) {
2675           for (i=0;i<size_prec_comm;i++) {
2676           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2677           }
2678         }
2679         /* displacements needed for scatterv of total_ranks_recv */
2680       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2681 
2682         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2683         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2684         for (j=0;j<size_coarse_comm;j++) {
2685           for (i=0;i<size_prec_comm;i++) {
2686             if (coarse_subdivision[i]==j) {
2687               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2688               total_count_recv[j]+=1;
2689             }
2690           }
2691         }
2692         /*for (j=0;j<size_coarse_comm;j++) {
2693           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2694           for (i=0;i<total_count_recv[j];i++) {
2695             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2696           }
2697           printf("\n");
2698         }*/
2699 
2700         /* identify new decomposition in terms of ranks in the old communicator */
2701         for (i=0;i<n_subdomains;i++) {
2702           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2703         }
2704         /*printf("coarse_subdivision in old end new ranks\n");
2705         for (i=0;i<size_prec_comm;i++)
2706           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2707             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2708           } else {
2709             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2710           }
2711         printf("\n");*/
2712       }
2713 
2714       /* Scatter new decomposition for send details */
2715       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2716       /* Scatter receiving details to members of coarse decomposition */
2717       if ( coarse_color == 0) {
2718         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2719         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2720         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2721       }
2722 
2723       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2724       if (coarse_color == 0) {
2725         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2726         for (i=0;i<count_recv;i++)
2727           printf("%d ",ranks_recv[i]);
2728         printf("\n");
2729       }*/
2730 
2731       if (rank_prec_comm == master_proc) {
2732         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2733         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2734         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2735         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2736       }
2737 #endif
2738       break;
2739     }
2740 
2741     case(REPLICATED_BDDC):
2742 
2743       pcbddc->coarse_communications_type = GATHERS_BDDC;
2744       coarse_mat_type = MATSEQAIJ;
2745       coarse_pc_type  = PCLU;
2746       coarse_ksp_type  = KSPPREONLY;
2747       coarse_comm = PETSC_COMM_SELF;
2748       active_rank = rank_prec_comm;
2749       break;
2750 
2751     case(PARALLEL_BDDC):
2752 
2753       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2754       coarse_mat_type = MATAIJ;
2755       coarse_pc_type  = PCREDUNDANT;
2756       coarse_ksp_type  = KSPPREONLY;
2757       coarse_comm = prec_comm;
2758       active_rank = rank_prec_comm;
2759       break;
2760 
2761     case(SEQUENTIAL_BDDC):
2762       pcbddc->coarse_communications_type = GATHERS_BDDC;
2763       coarse_mat_type = MATAIJ;
2764       coarse_pc_type = PCLU;
2765       coarse_ksp_type  = KSPPREONLY;
2766       coarse_comm = PETSC_COMM_SELF;
2767       active_rank = master_proc;
2768       break;
2769   }
2770 
2771   switch(pcbddc->coarse_communications_type){
2772 
2773     case(SCATTERS_BDDC):
2774       {
2775         if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2776 
2777           IS coarse_IS;
2778 
2779           if(pcbddc->coarsening_ratio == 1) {
2780             ins_local_primal_size = pcbddc->local_primal_size;
2781             ins_local_primal_indices = pcbddc->local_primal_indices;
2782             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2783             /* nonzeros */
2784             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2785             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2786             for (i=0;i<ins_local_primal_size;i++) {
2787               dnz[i] = ins_local_primal_size;
2788             }
2789           } else {
2790             PetscMPIInt send_size;
2791             PetscMPIInt *send_buffer;
2792             PetscInt    *aux_ins_indices;
2793             PetscInt    ii,jj;
2794             MPI_Request *requests;
2795 
2796             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2797             /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */
2798             ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
2799             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2800             pcbddc->replicated_primal_size = count_recv;
2801             j = 0;
2802             for (i=0;i<count_recv;i++) {
2803               pcbddc->local_primal_displacements[i] = j;
2804               j += pcbddc->local_primal_sizes[ranks_recv[i]];
2805             }
2806             pcbddc->local_primal_displacements[count_recv] = j;
2807             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2808             /* allocate auxiliary space */
2809             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2810             ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2811             ierr = PetscMemzero(aux_ins_indices,pcbddc->coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2812             /* allocate stuffs for message massing */
2813             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2814             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2815             /* send indices to be inserted */
2816             for (i=0;i<count_recv;i++) {
2817               send_size = pcbddc->local_primal_sizes[ranks_recv[i]];
2818               ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2819             }
2820             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2821               send_size = pcbddc->local_primal_size;
2822               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2823               for (i=0;i<send_size;i++) {
2824                 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2825               }
2826               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2827             }
2828             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2829             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2830               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2831             }
2832             j = 0;
2833             for (i=0;i<count_recv;i++) {
2834               ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i];
2835               localsizes2[i] = ii*ii;
2836               localdispl2[i] = j;
2837               j += localsizes2[i];
2838               jj = pcbddc->local_primal_displacements[i];
2839               /* it counts the coarse subdomains sharing the coarse node */
2840               for (k=0;k<ii;k++) {
2841                 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1;
2842               }
2843             }
2844             /* temp_coarse_mat_vals used to store matrix values to be received */
2845             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2846             /* evaluate how many values I will insert in coarse mat */
2847             ins_local_primal_size = 0;
2848             for (i=0;i<pcbddc->coarse_size;i++) {
2849               if (aux_ins_indices[i]) {
2850                 ins_local_primal_size++;
2851               }
2852             }
2853             /* evaluate indices I will insert in coarse mat */
2854             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2855             j = 0;
2856             for(i=0;i<pcbddc->coarse_size;i++) {
2857               if(aux_ins_indices[i]) {
2858                 ins_local_primal_indices[j] = i;
2859                 j++;
2860               }
2861             }
2862             /* processes partecipating in coarse problem receive matrix data from their friends */
2863             for (i=0;i<count_recv;i++) {
2864               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2865             }
2866             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2867               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2868               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2869             }
2870             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2871             /* nonzeros */
2872             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2873             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2874             /* use aux_ins_indices to realize a global to local mapping */
2875             j=0;
2876             for(i=0;i<pcbddc->coarse_size;i++){
2877               if(aux_ins_indices[i]==0){
2878                 aux_ins_indices[i]=-1;
2879               } else {
2880                 aux_ins_indices[i]=j;
2881                 j++;
2882               }
2883             }
2884             for (i=0;i<count_recv;i++) {
2885               j = pcbddc->local_primal_sizes[ranks_recv[i]];
2886               for (k=0;k<j;k++) {
2887                 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j;
2888               }
2889             }
2890             /* check */
2891             for (i=0;i<ins_local_primal_size;i++) {
2892               if (dnz[i] > ins_local_primal_size) {
2893                 dnz[i] = ins_local_primal_size;
2894               }
2895             }
2896             ierr = PetscFree(requests);CHKERRQ(ierr);
2897             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2898             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2899           }
2900           /* create local to global mapping needed by coarse MATIS */
2901           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2902           coarse_comm = prec_comm;
2903           active_rank = rank_prec_comm;
2904           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2905           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2906           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2907         } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2908           /* arrays for values insertion */
2909           ins_local_primal_size = pcbddc->local_primal_size;
2910           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2911           ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr);
2912           for (j=0;j<ins_local_primal_size;j++){
2913             ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
2914             for (i=0;i<ins_local_primal_size;i++) {
2915               ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2916             }
2917           }
2918         }
2919         break;
2920 
2921     }
2922 
2923     case(GATHERS_BDDC):
2924       {
2925 
2926         PetscMPIInt mysize,mysize2;
2927         PetscMPIInt *send_buffer;
2928 
2929         if (rank_prec_comm==active_rank) {
2930           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2931           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
2932           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2933           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2934           /* arrays for values insertion */
2935       for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
2936           localdispl2[0]=0;
2937       for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
2938           j=0;
2939       for (i=0;i<size_prec_comm;i++) j+=localsizes2[i];
2940           ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2941         }
2942 
2943         mysize=pcbddc->local_primal_size;
2944         mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
2945         ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2946     for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2947 
2948         if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){
2949           ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2950           ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr);
2951         } else {
2952           ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr);
2953           ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr);
2954         }
2955         ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2956         break;
2957       }/* switch on coarse problem and communications associated with finished */
2958   }
2959 
2960   /* Now create and fill up coarse matrix */
2961   if ( rank_prec_comm == active_rank ) {
2962 
2963     Mat matis_coarse_local_mat;
2964 
2965     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2966       ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr);
2967       ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size);CHKERRQ(ierr);
2968       ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr);
2969       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2970       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2971       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2972       ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2973       ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2974     } else {
2975       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr);
2976       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2977       ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2978       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2979       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2980       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2981       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2982       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2983     }
2984     /* preallocation */
2985     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2986 
2987       PetscInt lrows,lcols,bs;
2988 
2989       ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr);
2990       ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr);
2991       ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr);
2992 
2993       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2994 
2995         Vec         vec_dnz,vec_onz;
2996         PetscScalar *my_dnz,*my_onz,*array;
2997         PetscInt    *mat_ranges,*row_ownership;
2998         PetscInt    coarse_index_row,coarse_index_col,owner;
2999 
3000         ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr);
3001         ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
3002         ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,pcbddc->coarse_size);CHKERRQ(ierr);
3003         ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr);
3004         ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
3005 
3006         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr);
3007         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr);
3008         ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3009         ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3010 
3011         ierr = PetscMalloc(pcbddc->coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr);
3012         ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
3013         for (i=0;i<size_prec_comm;i++) {
3014           for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
3015             row_ownership[j]=i;
3016           }
3017         }
3018 
3019         for (i=0;i<pcbddc->local_primal_size;i++) {
3020           coarse_index_row = pcbddc->local_primal_indices[i];
3021           owner = row_ownership[coarse_index_row];
3022           for (j=i;j<pcbddc->local_primal_size;j++) {
3023             owner = row_ownership[coarse_index_row];
3024             coarse_index_col = pcbddc->local_primal_indices[j];
3025             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) {
3026               my_dnz[i] += 1.0;
3027             } else {
3028               my_onz[i] += 1.0;
3029             }
3030             if (i != j) {
3031               owner = row_ownership[coarse_index_col];
3032               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) {
3033                 my_dnz[j] += 1.0;
3034               } else {
3035                 my_onz[j] += 1.0;
3036               }
3037             }
3038           }
3039         }
3040         ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
3041         ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
3042         if (pcbddc->local_primal_size) {
3043           ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
3044           ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
3045         }
3046         ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
3047         ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
3048         ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
3049         ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
3050         j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
3051         ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
3052         for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
3053 
3054         ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
3055         ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
3056         for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
3057 
3058         ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
3059         ierr = PetscFree(my_dnz);CHKERRQ(ierr);
3060         ierr = PetscFree(my_onz);CHKERRQ(ierr);
3061         ierr = PetscFree(row_ownership);CHKERRQ(ierr);
3062         ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
3063         ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
3064       } else {
3065         for (k=0;k<size_prec_comm;k++){
3066           offset=pcbddc->local_primal_displacements[k];
3067           offset2=localdispl2[k];
3068           ins_local_primal_size = pcbddc->local_primal_sizes[k];
3069           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3070           for (j=0;j<ins_local_primal_size;j++){
3071             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3072           }
3073           for (j=0;j<ins_local_primal_size;j++) {
3074             ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr);
3075           }
3076           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3077         }
3078       }
3079 
3080       /* check */
3081       for (i=0;i<lrows;i++) {
3082         if (dnz[i]>lcols) dnz[i]=lcols;
3083         if (onz[i]>pcbddc->coarse_size-lcols) onz[i]=pcbddc->coarse_size-lcols;
3084       }
3085       ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr);
3086       ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr);
3087       ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3088     } else {
3089       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
3090       ierr = PetscFree(dnz);CHKERRQ(ierr);
3091     }
3092     /* insert values */
3093     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
3094       ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3095     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3096       if (pcbddc->coarsening_ratio == 1) {
3097         ins_coarse_mat_vals = coarse_submat_vals;
3098         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
3099       } else {
3100         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3101         for (k=0;k<pcbddc->replicated_primal_size;k++) {
3102           offset = pcbddc->local_primal_displacements[k];
3103           offset2 = localdispl2[k];
3104           ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k];
3105           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3106           for (j=0;j<ins_local_primal_size;j++){
3107             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3108           }
3109           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3110           ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3111           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3112         }
3113       }
3114       ins_local_primal_indices = 0;
3115       ins_coarse_mat_vals = 0;
3116     } else {
3117       for (k=0;k<size_prec_comm;k++){
3118         offset=pcbddc->local_primal_displacements[k];
3119         offset2=localdispl2[k];
3120         ins_local_primal_size = pcbddc->local_primal_sizes[k];
3121         ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3122         for (j=0;j<ins_local_primal_size;j++){
3123           ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3124         }
3125         ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3126         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3127         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3128       }
3129       ins_local_primal_indices = 0;
3130       ins_coarse_mat_vals = 0;
3131     }
3132     ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3133     ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134     /* symmetry of coarse matrix */
3135     if (issym) {
3136       ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3137     }
3138     ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3139   }
3140 
3141   /* create loc to glob scatters if needed */
3142   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
3143      IS local_IS,global_IS;
3144      ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr);
3145      ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr);
3146      ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3147      ierr = ISDestroy(&local_IS);CHKERRQ(ierr);
3148      ierr = ISDestroy(&global_IS);CHKERRQ(ierr);
3149   }
3150 
3151   /* free memory no longer needed */
3152   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
3153   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
3154   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
3155   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
3156   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
3157   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
3158 
3159   /* Compute coarse null space */
3160   CoarseNullSpace = 0;
3161   if (pcbddc->NullSpace) {
3162     ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr);
3163   }
3164 
3165   /* KSP for coarse problem */
3166   if (rank_prec_comm == active_rank) {
3167     PetscBool isbddc=PETSC_FALSE;
3168 
3169     ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr);
3170     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3171     ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3172     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr);
3173     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3174     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3175     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3176     /* Allow user's customization */
3177     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
3178     /* Set Up PC for coarse problem BDDC */
3179     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3180       i = pcbddc->current_level+1;
3181       ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr);
3182       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3183       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3184       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
3185       if (CoarseNullSpace) {
3186         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3187       }
3188       if (dbg_flag) {
3189         ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr);
3190         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3191       }
3192     } else {
3193       if (CoarseNullSpace) {
3194         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3195       }
3196     }
3197     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3198     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3199 
3200     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr);
3201     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3202     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3203     if (j == 1) {
3204       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3205       if (isbddc) {
3206         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
3207       }
3208     }
3209   }
3210   /* Check coarse problem if requested */
3211   if ( dbg_flag && rank_prec_comm == active_rank ) {
3212     KSP check_ksp;
3213     PC  check_pc;
3214     Vec check_vec;
3215     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
3216     KSPType check_ksp_type;
3217 
3218     /* Create ksp object suitable for extreme eigenvalues' estimation */
3219     ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr);
3220     ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3221     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3222     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3223       if (issym) check_ksp_type = KSPCG;
3224       else check_ksp_type = KSPGMRES;
3225       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3226     } else {
3227       check_ksp_type = KSPPREONLY;
3228     }
3229     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3230     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3231     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3232     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3233     /* create random vec */
3234     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3235     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3236     if (CoarseNullSpace) {
3237       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3238     }
3239     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3240     /* solve coarse problem */
3241     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3242     if (CoarseNullSpace) {
3243       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3244     }
3245     /* check coarse problem residual error */
3246     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3247     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3248     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3249     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3250     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3251     /* get eigenvalue estimation if inexact */
3252     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3253       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3254       ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr);
3255       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr);
3256       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3257     }
3258     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3259     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3260     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3261   }
3262   if (dbg_flag) {
3263     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3264   }
3265   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3266 
3267   PetscFunctionReturn(0);
3268 }
3269 
3270