xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 983f5fd7f9a77d10cf7446deedabae668658a6b1)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
10   PetscErrorCode    ierr;
11 
12   PetscFunctionBegin;
13   /* Compute matrix after change of basis and extract local submatrices */
14   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
15 
16   /* Allocate needed vectors */
17   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
18 
19   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
20   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
21 
22   /* Setup local solvers ksp_D and ksp_R */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /* setup local correction and local part of coarse basis */
31   ierr = PCBDDCSetUpCoarseLocal(pc);CHKERRQ(ierr);
32   PetscFunctionReturn(0);
33 }
34 
35 #undef __FUNCT__
36 #define __FUNCT__ "PCBDDCSetLevel"
37 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
38 {
39   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
40 
41   PetscFunctionBegin;
42   pcbddc->current_level=level;
43   PetscFunctionReturn(0);
44 }
45 
46 #undef __FUNCT__
47 #define __FUNCT__ "PCBDDCResetCustomization"
48 PetscErrorCode PCBDDCResetCustomization(PC pc)
49 {
50   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
51   PetscInt       i;
52   PetscErrorCode ierr;
53 
54   PetscFunctionBegin;
55   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
57   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
59   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
60   for (i=0;i<pcbddc->n_ISForDofs;i++) {
61     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
62   }
63   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
64   PetscFunctionReturn(0);
65 }
66 
67 #undef __FUNCT__
68 #define __FUNCT__ "PCBDDCResetTopography"
69 PetscErrorCode PCBDDCResetTopography(PC pc)
70 {
71   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
72   PetscErrorCode ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
76   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
77   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "PCBDDCResetSolvers"
83 PetscErrorCode PCBDDCResetSolvers(PC pc)
84 {
85   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
86   PetscErrorCode ierr;
87 
88   PetscFunctionBegin;
89   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
90   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
91   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
94   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
95   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
97   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
99   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
100   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
101   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
102   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
103   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
104   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
105   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
106   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
107   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
110   ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
111   ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
112   ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCBDDCCreateWorkVectors"
118 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
119 {
120   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
121   PC_IS          *pcis = (PC_IS*)pc->data;
122   VecType        impVecType;
123   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
124   PetscErrorCode ierr;
125 
126   PetscFunctionBegin;
127   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
128   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
129   local_primal_size = n_constraints+n_vertices;
130   n_R = pcis->n-n_vertices;
131   /* local work vectors */
132   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
133   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
134   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
135   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
136   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
137   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
138   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
139   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
140   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
141   if (n_constraints) {
142     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
143     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
144     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
145   }
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
151 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc)
152 {
153   PetscErrorCode         ierr;
154   /* pointers to pcis and pcbddc */
155   PC_IS*                 pcis = (PC_IS*)pc->data;
156   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
157   /* submatrices of local problem */
158   Mat                    A_RV,A_VR,A_VV;
159   /* working matrices */
160   Mat                    M1,M2,M3,C_CR;
161   /* working vectors */
162   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
163   /* additional working stuff */
164   IS                     is_aux;
165   ISLocalToGlobalMapping BtoNmap;
166   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
167   const PetscScalar      *array,*row_cmat_values;
168   const PetscInt         *row_cmat_indices,*idx_R_local;
169   PetscInt               *vertices,*idx_V_B,*auxindices;
170   PetscInt               n_vertices,n_constraints,size_of_constraint;
171   PetscInt               i,j,n_R,n_D,n_B;
172   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
173   /* Vector and matrix types */
174   VecType                impVecType;
175   MatType                impMatType;
176   /* some shortcuts to scalars */
177   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
178   /* for debugging purposes */
179   PetscReal              *coarsefunctions_errors,*constraints_errors;
180 
181   PetscFunctionBegin;
182   /* get number of vertices and their local indices */
183   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
184   n_constraints = pcbddc->local_primal_size-n_vertices;
185   /* Set Non-overlapping dimensions */
186   n_B = pcis->n_B; n_D = pcis->n - n_B;
187   n_R = pcis->n-n_vertices;
188 
189   /* Set types for local objects needed by BDDC precondtioner */
190   impMatType = MATSEQDENSE;
191   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
192 
193   /* Allocating some extra storage just to be safe */
194   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
195   for (i=0;i<pcis->n;i++) auxindices[i]=i;
196 
197   /* vertices in boundary numbering */
198   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
199   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
200   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
201   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
202   if (i != n_vertices) {
203     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
204   }
205 
206   /* some work vectors on vertices and/or constraints */
207   if (n_vertices) {
208     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
209     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
210     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
211     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
212   }
213   if (n_constraints) {
214     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
215     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
216   }
217 
218   /* Precompute stuffs needed for preprocessing and application of BDDC*/
219   if (n_constraints) {
220     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
221     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
222     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
223     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
224 
225     /* Extract constraints on R nodes: C_{CR}  */
226     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
227     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
228     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
229 
230     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
231     for (i=0;i<n_constraints;i++) {
232       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
233       /* Get row of constraint matrix in R numbering */
234       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
235       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
237       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
238       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
239       /* Solve for row of constraint matrix in R numbering */
240       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
241       /* Set values in local_auxmat2 */
242       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
243       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
244       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
245     }
246     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
247     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
248     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
249 
250     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
251     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
252     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
253     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
254     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
255     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
256     ierr = MatSetUp(M1);CHKERRQ(ierr);
257     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
258     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
259     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
260     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
261     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
262     ierr = MatDestroy(&M2);CHKERRQ(ierr);
263     ierr = MatDestroy(&M3);CHKERRQ(ierr);
264     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
265     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
266   }
267 
268   /* Get submatrices from subdomain matrix */
269   if (n_vertices) {
270     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
271     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
272     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
273     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
274     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
275   }
276 
277   /* Matrix of coarse basis functions (local) */
278   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
279   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
280   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
281   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
282   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
283     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
284     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
285     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
286     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
287   }
288 
289   if (pcbddc->dbg_flag) {
290     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
291     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
292     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
293   }
294   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
295   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
296 
297   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
298 
299   /* vertices */
300   for (i=0;i<n_vertices;i++) {
301     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
302     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
303     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
304     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
305     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
306     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
307     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
308     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
309     if (n_constraints) {
310       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
311       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
312       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
313     }
314     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
315     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
316 
317     /* Set values in coarse basis function and subdomain part of coarse_mat */
318     /* coarse basis functions */
319     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
320     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
322     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
323     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
324     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
325     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
326     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
327       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
328       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
329       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
330       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
331       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
332     }
333     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
334     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
335     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
336     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
337     if (n_constraints) {
338       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
339       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
340       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
341     }
342 
343     /* check */
344     if (pcbddc->dbg_flag) {
345       /* assemble subdomain vector on local nodes */
346       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
348       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
350       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
351       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
352       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
353       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
354       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
355       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
356       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
357       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
358       if (n_constraints) {
359         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
360         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
361         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
362       }
363       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
364       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
365       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
366       /* check saddle point solution */
367       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
368       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
369       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
370       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
371       /* shift by the identity matrix */
372       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
373       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
374       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
375       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
376     }
377   }
378 
379   /* constraints */
380   for (i=0;i<n_constraints;i++) {
381     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
382     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
383     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
384     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
385     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
386     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
387     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
388     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
389     if (n_vertices) {
390       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
391     }
392     /* Set values in coarse basis function and subdomain part of coarse_mat */
393     /* coarse basis functions */
394     j = i+n_vertices; /* don't touch this! */
395     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
396     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
397     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
398     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
399     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
400     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
401     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
402       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
403       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
404       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
406       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
407     }
408     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
409     if (n_vertices) {
410       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
411       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
412       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
413     }
414     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
415     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
416     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
417 
418     if (pcbddc->dbg_flag) {
419       /* assemble subdomain vector on nodes */
420       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
421       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
424       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
425       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
426       /* assemble subdomain vector of lagrange multipliers */
427       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
428       if (n_vertices) {
429         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
430         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
431         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
432       }
433       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
434       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
435       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
436       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
439       /* check saddle point solution */
440       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
443       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
444       /* shift by the identity matrix */
445       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
449     }
450   }
451   /* call assembling routines for local coarse basis */
452   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
453   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
454   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
455     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
456     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   }
458 
459   /* compute other basis functions for non-symmetric problems */
460   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
461   if (!setsym || (setsym && !issym)) {
462     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
463     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
464     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
465     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
466     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
467       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
468       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
469       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
470       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
471     }
472     for (i=0;i<pcbddc->local_primal_size;i++) {
473       if (n_constraints) {
474         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
475         for (j=0;j<n_constraints;j++) {
476           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
477         }
478         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
479         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
480       }
481       if (i<n_vertices) {
482         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
483         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
484         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
485         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
486         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
487         if (n_constraints) {
488           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
489         }
490       } else {
491         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
492       }
493       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
494       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
495       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
496       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
498       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
499       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
500       if (i<n_vertices) {
501         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
502       }
503       if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
504         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
505         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
507         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
509       }
510 
511       if (pcbddc->dbg_flag) {
512         /* assemble subdomain vector on nodes */
513         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
514         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
515         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
516         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
517         if (i<n_vertices) {
518           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
519         }
520         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
521         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
522         /* assemble subdomain vector of lagrange multipliers */
523         for (j=0;j<pcbddc->local_primal_size;j++) {
524           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
525         }
526         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
527         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
528         /* check saddle point solution */
529         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
530         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
531         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
532         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
533         /* shift by the identity matrix */
534         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
535         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
536         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
537         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
538       }
539     }
540     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
541     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
542     if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
543       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
544       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     }
546   }
547   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
548   /* Checking coarse_sub_mat and coarse basis functios */
549   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
550   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
551   if (pcbddc->dbg_flag) {
552     Mat         coarse_sub_mat;
553     Mat         AUXMAT,TM1,TM2,TM3,TM4;
554     Mat         coarse_phi_D,coarse_phi_B;
555     Mat         coarse_psi_D,coarse_psi_B;
556     Mat         A_II,A_BB,A_IB,A_BI;
557     MatType     checkmattype=MATSEQAIJ;
558     PetscReal   real_value;
559 
560     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
561     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
562     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
563     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
564     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
565     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
566     if (pcbddc->coarse_psi_B) {
567       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
568       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
569     }
570     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
571 
572     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
573     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
574     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
575     if (pcbddc->coarse_psi_B) {
576       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
577       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
578       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
579       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
580       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
581       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
582       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
583       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
584       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
585       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
586       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
587       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
588     } else {
589       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
590       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
591       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
592       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
593       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
594       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
595       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
596       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
597     }
598     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
599     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
600     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
601     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
602     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
604     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
605     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
606     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
607     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
608     for (i=0;i<pcbddc->local_primal_size;i++) {
609       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
610     }
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
612     for (i=0;i<pcbddc->local_primal_size;i++) {
613       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
614     }
615     if (pcbddc->coarse_psi_B) {
616       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
617       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
618         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
619       }
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
621       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
622         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
623       }
624     }
625     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
626     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
627     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
628     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
629     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
630     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
631     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
632     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
633     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
634     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
635     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
636     if (pcbddc->coarse_psi_B) {
637       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
638       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
639     }
640     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
641     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
642     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
643     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
644   }
645   /* free memory */
646   if (n_vertices) {
647     ierr = PetscFree(vertices);CHKERRQ(ierr);
648     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
649     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
650     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
651     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
652     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
653   }
654   if (n_constraints) {
655     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
656     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
657     ierr = MatDestroy(&M1);CHKERRQ(ierr);
658     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
659   }
660   ierr = PetscFree(auxindices);CHKERRQ(ierr);
661   /* create coarse matrix and data structures for message passing associated actual choice of coarse problem type */
662   ierr = PCBDDCSetUpCoarseEnvironment(pc,coarse_submat_vals);CHKERRQ(ierr);
663   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
669 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
670 {
671   PC_IS*            pcis = (PC_IS*)(pc->data);
672   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
673   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
674   /* manage repeated solves */
675   MatReuse          reuse;
676   MatStructure      matstruct;
677   PetscErrorCode    ierr;
678 
679   PetscFunctionBegin;
680   /* get mat flags */
681   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
682   reuse = MAT_INITIAL_MATRIX;
683   if (pc->setupcalled) {
684     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
685     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
686     if (matstruct == SAME_NONZERO_PATTERN) {
687       reuse = MAT_REUSE_MATRIX;
688     } else {
689       reuse = MAT_INITIAL_MATRIX;
690     }
691   }
692   if (reuse == MAT_INITIAL_MATRIX) {
693     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
694     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
695     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
696     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
698   }
699 
700   /* transform local matrices if needed */
701   if (pcbddc->use_change_of_basis) {
702     Mat         change_mat_all;
703     PetscScalar *row_cmat_values;
704     PetscInt    *row_cmat_indices;
705     PetscInt    *nnz,*is_indices,*temp_indices;
706     PetscInt    i,j,k,n_D,n_B;
707 
708     /* Get Non-overlapping dimensions */
709     n_B = pcis->n_B;
710     n_D = pcis->n-n_B;
711 
712     /* compute nonzero structure of change of basis on all local nodes */
713     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
714     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
715     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
716     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
718     k=1;
719     for (i=0;i<n_B;i++) {
720       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
721       nnz[is_indices[i]]=j;
722       if (k < j) k = j;
723       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
724     }
725     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
726     /* assemble change of basis matrix on the whole set of local dofs */
727     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
728     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
729     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
730     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
731     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
732     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
733     for (i=0;i<n_D;i++) {
734       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
735     }
736     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
737     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
738     for (i=0;i<n_B;i++) {
739       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
740       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
741       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
742       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
743     }
744     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
745     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
746     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
747     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
748     if (i==1) {
749       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
750     } else {
751       Mat work_mat;
752       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
753       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
754       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
755     }
756     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
757     ierr = PetscFree(nnz);CHKERRQ(ierr);
758     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
759   } else {
760     /* without change of basis, the local matrix is unchanged */
761     if (!pcbddc->local_mat) {
762       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
763       pcbddc->local_mat = matis->A;
764     }
765   }
766 
767   /* get submatrices */
768   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
769   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
770   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 
775 #undef __FUNCT__
776 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
777 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
778 {
779   PC_IS*         pcis = (PC_IS*)(pc->data);
780   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
781   IS             is_aux1,is_aux2;
782   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
783   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
784   PetscBool      *array_bool;
785   PetscErrorCode ierr;
786 
787   PetscFunctionBegin;
788   /* Set Non-overlapping dimensions */
789   n_B = pcis->n_B; n_D = pcis->n - n_B;
790   /* get vertex indices from constraint matrix */
791   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
792   /* Set number of constraints */
793   n_constraints = pcbddc->local_primal_size-n_vertices;
794   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
795   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&array_bool);CHKERRQ(ierr);
796   for (i=0;i<pcis->n;i++) array_bool[i] = PETSC_TRUE;
797   for (i=0;i<n_vertices;i++) array_bool[vertices[i]] = PETSC_FALSE;
798   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
799   for (i=0, n_R=0; i<pcis->n; i++) {
800     if (array_bool[i]) {
801       idx_R_local[n_R] = i;
802       n_R++;
803     }
804   }
805   ierr = PetscFree(vertices);CHKERRQ(ierr);
806   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
807 
808   /* print some info if requested */
809   if (pcbddc->dbg_flag) {
810     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
811     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
812     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
813     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
814     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
815     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
816     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
817   }
818 
819   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
820   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
821   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
822   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
823   for (i=0; i<n_D; i++) array_bool[is_indices[i]] = PETSC_FALSE;
824   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
825   for (i=0, j=0; i<n_R; i++) {
826     if (array_bool[idx_R_local[i]]) {
827       aux_array1[j] = i;
828       j++;
829     }
830   }
831   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
832   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   for (i=0, j=0; i<n_B; i++) {
834     if (array_bool[is_indices[i]]) {
835       aux_array2[j] = i; j++;
836     }
837   }
838   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
839   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
840   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
841   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
842   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
843 
844   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
845     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
846     for (i=0, j=0; i<n_R; i++) {
847       if (!array_bool[idx_R_local[i]]) {
848         aux_array1[j] = i;
849         j++;
850       }
851     }
852     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
853     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
854     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
855   }
856   ierr = PetscFree(array_bool);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
862 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
863 {
864   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
865 
866   PetscFunctionBegin;
867   pcbddc->use_exact_dirichlet=use;
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
873 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
874 {
875   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
876   PC_IS          *pcis = (PC_IS*)pc->data;
877   PC             pc_temp;
878   Mat            A_RR;
879   Vec            vec1,vec2,vec3;
880   MatStructure   matstruct;
881   PetscScalar    m_one = -1.0;
882   PetscReal      value;
883   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887   /* Creating PC contexts for local Dirichlet and Neumann problems */
888   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
889 
890   /* DIRICHLET PROBLEM */
891   /* Matrix for Dirichlet problem is pcis->A_II */
892   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
893   if (!pcbddc->ksp_D) { /* create object if not yet build */
894     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
895     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
896     /* default */
897     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
898     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
899     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
900     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
901     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
902   }
903   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
904   /* Allow user's customization */
905   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
906   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
907   if (!n_D) {
908     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
909     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
910   }
911   /* Set Up KSP for Dirichlet problem of BDDC */
912   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
913   /* set ksp_D into pcis data */
914   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
915   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
916   pcis->ksp_D = pcbddc->ksp_D;
917 
918   /* NEUMANN PROBLEM */
919   /* Matrix for Neumann problem is A_RR -> we need to create it */
920   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
921   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
922   if (!pcbddc->ksp_R) { /* create object if not yet build */
923     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
924     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
925     /* default */
926     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
927     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
928     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
929     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
930     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
931   }
932   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
933   /* Allow user's customization */
934   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
935   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
936   if (!n_R) {
937     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
938     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
939   }
940   /* Set Up KSP for Neumann problem of BDDC */
941   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
942 
943   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
944 
945   /* Dirichlet */
946   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
947   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
948   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
949   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
950   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
951   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
952   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
953   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
954   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
955   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
956   /* need to be adapted? */
957   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
958   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
959   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
960   /* print info */
961   if (pcbddc->dbg_flag) {
962     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
963     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
964     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
965     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
966     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
967   }
968   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) {
969     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
970   }
971 
972   /* Neumann */
973   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
974   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
975   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
976   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
977   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
978   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
979   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
980   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
981   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
982   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
983   /* need to be adapted? */
984   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
985   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
986   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
987   /* print info */
988   if (pcbddc->dbg_flag) {
989     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
990     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
991   }
992   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
993     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
994   }
995 
996   /* free Neumann problem's matrix */
997   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1003 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1004 {
1005   PetscErrorCode ierr;
1006   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1007 
1008   PetscFunctionBegin;
1009   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1010   if (pcbddc->local_auxmat1) {
1011     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1012     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1019 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1020 {
1021   PetscErrorCode ierr;
1022   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1023   PC_IS*            pcis = (PC_IS*)  (pc->data);
1024   const PetscScalar zero = 0.0;
1025 
1026   PetscFunctionBegin;
1027   /* Application of PHI^T (or PSI^T)  */
1028   if (pcbddc->coarse_psi_B) {
1029     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1030     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1031   } else {
1032     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1033     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1034   }
1035   /* Scatter data of coarse_rhs */
1036   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1037   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038 
1039   /* Local solution on R nodes */
1040   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1041   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1042   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1043   if (pcbddc->inexact_prec_type) {
1044     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1045     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1046   }
1047   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1048   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1049   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051   if (pcbddc->inexact_prec_type) {
1052     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054   }
1055 
1056   /* Coarse solution */
1057   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1059     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1060   }
1061   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1062   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1063 
1064   /* Sum contributions from two levels */
1065   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1066   if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1072 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1073 {
1074   PetscErrorCode ierr;
1075   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1076 
1077   PetscFunctionBegin;
1078   switch (pcbddc->coarse_communications_type) {
1079     case SCATTERS_BDDC:
1080       ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1081       break;
1082     case GATHERS_BDDC:
1083       break;
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1090 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1091 {
1092   PetscErrorCode ierr;
1093   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1094   PetscScalar*   array_to;
1095   PetscScalar*   array_from;
1096   MPI_Comm       comm;
1097   PetscInt       i;
1098 
1099   PetscFunctionBegin;
1100   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1101   switch (pcbddc->coarse_communications_type) {
1102     case SCATTERS_BDDC:
1103       ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1104       break;
1105     case GATHERS_BDDC:
1106       if (vec_from) {
1107         ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr);
1108       }
1109       if (vec_to) {
1110         ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr);
1111       }
1112       switch(pcbddc->coarse_problem_type){
1113         case SEQUENTIAL_BDDC:
1114           if (smode == SCATTER_FORWARD) {
1115             ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1116             if (vec_to) {
1117               if (imode == ADD_VALUES) {
1118                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1119                   array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1120                 }
1121               } else {
1122                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1123                   array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1124                 }
1125               }
1126             }
1127           } else {
1128             if (vec_from) {
1129               if (imode == ADD_VALUES) {
1130                 MPI_Comm vec_from_comm;
1131                 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr);
1132                 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type);
1133               }
1134               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1135                 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
1136               }
1137             }
1138             ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1139           }
1140           break;
1141         case REPLICATED_BDDC:
1142           if (smode == SCATTER_FORWARD) {
1143             ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr);
1144             if (imode == ADD_VALUES) {
1145               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1146                 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1147               }
1148             } else {
1149               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1150                 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1151               }
1152             }
1153           } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
1154             if (imode == ADD_VALUES) {
1155               for (i=0;i<pcbddc->local_primal_size;i++) {
1156                 array_to[i]+=array_from[pcbddc->local_primal_indices[i]];
1157               }
1158             } else {
1159               for (i=0;i<pcbddc->local_primal_size;i++) {
1160                 array_to[i]=array_from[pcbddc->local_primal_indices[i]];
1161               }
1162             }
1163           }
1164           break;
1165         case MULTILEVEL_BDDC:
1166           break;
1167         case PARALLEL_BDDC:
1168           break;
1169       }
1170       if (vec_from) {
1171         ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr);
1172       }
1173       if (vec_to) {
1174         ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr);
1175       }
1176       break;
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /* uncomment for testing purposes */
1182 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1183 #undef __FUNCT__
1184 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1185 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1186 {
1187   PetscErrorCode    ierr;
1188   PC_IS*            pcis = (PC_IS*)(pc->data);
1189   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1190   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1191   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1192   MatType           impMatType=MATSEQAIJ;
1193   /* one and zero */
1194   PetscScalar       one=1.0,zero=0.0;
1195   /* space to store constraints and their local indices */
1196   PetscScalar       *temp_quadrature_constraint;
1197   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1198   /* iterators */
1199   PetscInt          i,j,k,total_counts,temp_start_ptr;
1200   /* stuff to store connected components stored in pcbddc->mat_graph */
1201   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1202   PetscInt          n_ISForFaces,n_ISForEdges;
1203   PetscBool         get_faces,get_edges,get_vertices;
1204   /* near null space stuff */
1205   MatNullSpace      nearnullsp;
1206   const Vec         *nearnullvecs;
1207   Vec               *localnearnullsp;
1208   PetscBool         nnsp_has_cnst;
1209   PetscInt          nnsp_size;
1210   PetscScalar       *array;
1211   /* BLAS integers */
1212   PetscBLASInt      lwork,lierr;
1213   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1214   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1215   /* LAPACK working arrays for SVD or POD */
1216   PetscBool         skip_lapack;
1217   PetscScalar       *work;
1218   PetscReal         *singular_vals;
1219 #if defined(PETSC_USE_COMPLEX)
1220   PetscReal         *rwork;
1221 #endif
1222 #if defined(PETSC_MISSING_LAPACK_GESVD)
1223   PetscBLASInt      Blas_one_2=1;
1224   PetscScalar       *temp_basis,*correlation_mat;
1225 #else
1226   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1227   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1228 #endif
1229   /* change of basis */
1230   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1231   PetscBool         boolforchange,*change_basis,*touched;
1232   /* auxiliary stuff */
1233   PetscInt          *nnz,*is_indices,*local_to_B;
1234   /* some quantities */
1235   PetscInt          n_vertices,total_primal_vertices;
1236   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1237 
1238 
1239   PetscFunctionBegin;
1240   /* Get index sets for faces, edges and vertices from graph */
1241   get_faces = PETSC_TRUE;
1242   get_edges = PETSC_TRUE;
1243   get_vertices = PETSC_TRUE;
1244   if (pcbddc->vertices_flag) {
1245     get_faces = PETSC_FALSE;
1246     get_edges = PETSC_FALSE;
1247   }
1248   if (pcbddc->constraints_flag) {
1249     get_vertices = PETSC_FALSE;
1250   }
1251   if (pcbddc->faces_flag) {
1252     get_edges = PETSC_FALSE;
1253   }
1254   if (pcbddc->edges_flag) {
1255     get_faces = PETSC_FALSE;
1256   }
1257   /* default */
1258   if (!get_faces && !get_edges && !get_vertices) {
1259     get_vertices = PETSC_TRUE;
1260   }
1261   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1262   /* print some info */
1263   if (pcbddc->dbg_flag) {
1264     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1265     i = 0;
1266     if (ISForVertices) {
1267       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1268     }
1269     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1270     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1271     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1272     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1273   }
1274   /* check if near null space is attached to global mat */
1275   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1276   if (nearnullsp) {
1277     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1278   } else { /* if near null space is not provided BDDC uses constants by default */
1279     nnsp_size = 0;
1280     nnsp_has_cnst = PETSC_TRUE;
1281   }
1282   /* get max number of constraints on a single cc */
1283   max_constraints = nnsp_size;
1284   if (nnsp_has_cnst) max_constraints++;
1285 
1286   /*
1287        Evaluate maximum storage size needed by the procedure
1288        - temp_indices will contain start index of each constraint stored as follows
1289        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1290        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1291        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1292                                                                                                                                                          */
1293   total_counts = n_ISForFaces+n_ISForEdges;
1294   total_counts *= max_constraints;
1295   n_vertices = 0;
1296   if (ISForVertices) {
1297     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1298   }
1299   total_counts += n_vertices;
1300   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1301   ierr = PetscMalloc((total_counts+1)*sizeof(PetscBool),&change_basis);CHKERRQ(ierr);
1302   total_counts = 0;
1303   max_size_of_constraint = 0;
1304   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1305     if (i<n_ISForEdges) {
1306       used_IS = &ISForEdges[i];
1307     } else {
1308       used_IS = &ISForFaces[i-n_ISForEdges];
1309     }
1310     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1311     total_counts += j;
1312     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1313   }
1314   total_counts *= max_constraints;
1315   total_counts += n_vertices;
1316   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1317   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1318   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1319   /* local to boundary numbering */
1320   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1321   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1322   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1323   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1324   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1325   /* get local part of global near null space vectors */
1326   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1327   for (k=0;k<nnsp_size;k++) {
1328     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1329     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1331   }
1332 
1333   /* whether or not to skip lapack calls */
1334   skip_lapack = PETSC_TRUE;
1335   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1336 
1337   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1338   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1339     PetscScalar temp_work;
1340 #if defined(PETSC_MISSING_LAPACK_GESVD)
1341     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1342     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1343     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1344     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1345 #if defined(PETSC_USE_COMPLEX)
1346     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1347 #endif
1348     /* now we evaluate the optimal workspace using query with lwork=-1 */
1349     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1350     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1351     lwork = -1;
1352     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1353 #if !defined(PETSC_USE_COMPLEX)
1354     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1355 #else
1356     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1357 #endif
1358     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1359     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1360 #else /* on missing GESVD */
1361     /* SVD */
1362     PetscInt max_n,min_n;
1363     max_n = max_size_of_constraint;
1364     min_n = max_constraints;
1365     if (max_size_of_constraint < max_constraints) {
1366       min_n = max_size_of_constraint;
1367       max_n = max_constraints;
1368     }
1369     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1370 #if defined(PETSC_USE_COMPLEX)
1371     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1372 #endif
1373     /* now we evaluate the optimal workspace using query with lwork=-1 */
1374     lwork = -1;
1375     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1376     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1377     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1378     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1379 #if !defined(PETSC_USE_COMPLEX)
1380     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1381 #else
1382     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1383 #endif
1384     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1385     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1386 #endif /* on missing GESVD */
1387     /* Allocate optimal workspace */
1388     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1389     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1390   }
1391   /* Now we can loop on constraining sets */
1392   total_counts = 0;
1393   temp_indices[0] = 0;
1394   /* vertices */
1395   if (ISForVertices) {
1396     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1397     if (nnsp_has_cnst) { /* consider all vertices */
1398       for (i=0;i<n_vertices;i++) {
1399         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1400         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1401         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1402         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1403         change_basis[total_counts]=PETSC_FALSE;
1404         total_counts++;
1405       }
1406     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1407       PetscBool used_vertex;
1408       for (i=0;i<n_vertices;i++) {
1409         used_vertex = PETSC_FALSE;
1410         k = 0;
1411         while (!used_vertex && k<nnsp_size) {
1412           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1413           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1414             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1415             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1416             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1417             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1418             change_basis[total_counts]=PETSC_FALSE;
1419             total_counts++;
1420             used_vertex = PETSC_TRUE;
1421           }
1422           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1423           k++;
1424         }
1425       }
1426     }
1427     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1428     n_vertices = total_counts;
1429   }
1430 
1431   /* edges and faces */
1432   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1433     if (i<n_ISForEdges) {
1434       used_IS = &ISForEdges[i];
1435       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1436     } else {
1437       used_IS = &ISForFaces[i-n_ISForEdges];
1438       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1439     }
1440     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1441     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1442     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1443     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1444     /* change of basis should not be performed on local periodic nodes */
1445     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1446     if (nnsp_has_cnst) {
1447       PetscScalar quad_value;
1448       temp_constraints++;
1449       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1450       for (j=0;j<size_of_constraint;j++) {
1451         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1452         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1453         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1454       }
1455       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1456       change_basis[total_counts]=boolforchange;
1457       total_counts++;
1458     }
1459     for (k=0;k<nnsp_size;k++) {
1460       PetscReal real_value;
1461       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1462       for (j=0;j<size_of_constraint;j++) {
1463         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1464         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1465         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1466       }
1467       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1468       /* check if array is null on the connected component */
1469       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1470       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1471       if (real_value > 0.0) { /* keep indices and values */
1472         temp_constraints++;
1473         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1474         change_basis[total_counts]=boolforchange;
1475         total_counts++;
1476       }
1477     }
1478     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1479     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1480     if (!pcbddc->use_nnsp_true) {
1481       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1482 
1483 #if defined(PETSC_MISSING_LAPACK_GESVD)
1484       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1485          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1486          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1487             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1488             from that computed using LAPACKgesvd
1489          -> This is due to a different computation of eigenvectors in LAPACKheev
1490          -> The quality of the POD-computed basis will be the same */
1491       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1492       /* Store upper triangular part of correlation matrix */
1493       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1494       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1495       for (j=0;j<temp_constraints;j++) {
1496         for (k=0;k<j+1;k++) {
1497           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1498         }
1499       }
1500       /* compute eigenvalues and eigenvectors of correlation matrix */
1501       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1502       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1503 #if !defined(PETSC_USE_COMPLEX)
1504       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1505 #else
1506       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1507 #endif
1508       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1509       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1510       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1511       j=0;
1512       while (j < temp_constraints && singular_vals[j] < tol) j++;
1513       total_counts=total_counts-j;
1514       /* scale and copy POD basis into used quadrature memory */
1515       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1516       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1517       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1518       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1519       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1520       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1521       if (j<temp_constraints) {
1522         PetscInt ii;
1523         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1524         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1525         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1526         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1527         for (k=0;k<temp_constraints-j;k++) {
1528           for (ii=0;ii<size_of_constraint;ii++) {
1529             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1530           }
1531         }
1532       }
1533 #else  /* on missing GESVD */
1534       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1535       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1536       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1537       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1538 #if !defined(PETSC_USE_COMPLEX)
1539       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1540 #else
1541       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1542 #endif
1543       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1544       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1545       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1546       k = temp_constraints;
1547       if (k > size_of_constraint) k = size_of_constraint;
1548       j = 0;
1549       while (j < k && singular_vals[k-j-1] < tol) j++;
1550       total_counts = total_counts-temp_constraints+k-j;
1551 #endif /* on missing GESVD */
1552     }
1553   }
1554   /* free index sets of faces, edges and vertices */
1555   for (i=0;i<n_ISForFaces;i++) {
1556     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1557   }
1558   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1559   for (i=0;i<n_ISForEdges;i++) {
1560     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1561   }
1562   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1563   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1564 
1565   /* free workspace */
1566   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1567     ierr = PetscFree(work);CHKERRQ(ierr);
1568 #if defined(PETSC_USE_COMPLEX)
1569     ierr = PetscFree(rwork);CHKERRQ(ierr);
1570 #endif
1571     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1572 #if defined(PETSC_MISSING_LAPACK_GESVD)
1573     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1574     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1575 #endif
1576   }
1577   for (k=0;k<nnsp_size;k++) {
1578     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1579   }
1580   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1581 
1582   /* set quantities in pcbddc data structure */
1583   /* n_vertices defines the number of subdomain corners in the primal space */
1584   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1585   pcbddc->local_primal_size = total_counts;
1586   pcbddc->n_vertices = n_vertices;
1587   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1588 
1589   /* Create constraint matrix */
1590   /* The constraint matrix is used to compute the l2g map of primal dofs */
1591   /* so we need to set it up properly either with or without change of basis */
1592   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1593   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1594   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1595   /* array to compute a local numbering of constraints : vertices first then constraints */
1596   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1597   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1598   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1599   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1600   /* auxiliary stuff for basis change */
1601   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1602   ierr = PetscMalloc(pcis->n_B*sizeof(PetscBool),&touched);CHKERRQ(ierr);
1603   ierr = PetscMemzero(touched,pcis->n_B*sizeof(PetscBool));CHKERRQ(ierr);
1604 
1605   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1606   total_primal_vertices=0;
1607   for (i=0;i<pcbddc->local_primal_size;i++) {
1608     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1609     if (size_of_constraint == 1) {
1610       touched[temp_indices_to_constraint_B[temp_indices[i]]]=PETSC_TRUE;
1611       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1612       aux_primal_minloc[total_primal_vertices]=0;
1613       total_primal_vertices++;
1614     } else if (change_basis[i]) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1615       PetscInt min_loc,min_index;
1616       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1617       /* find first untouched local node */
1618       k = 0;
1619       while (touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) k++;
1620       min_index = global_indices[k];
1621       min_loc = k;
1622       /* search the minimum among global nodes already untouched on the cc */
1623       for (k=1;k<size_of_constraint;k++) {
1624         /* there can be more than one constraint on a single connected component */
1625         if (min_index > global_indices[k] && !touched[temp_indices_to_constraint_B[temp_indices[i]+k]]) {
1626           min_index = global_indices[k];
1627           min_loc = k;
1628         }
1629       }
1630       touched[temp_indices_to_constraint_B[temp_indices[i]+min_loc]] = PETSC_TRUE;
1631       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1632       aux_primal_minloc[total_primal_vertices]=min_loc;
1633       total_primal_vertices++;
1634     }
1635   }
1636   /* free workspace */
1637   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1638   ierr = PetscFree(touched);CHKERRQ(ierr);
1639   /* permute indices in order to have a sorted set of vertices */
1640   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1641 
1642   /* nonzero structure of constraint matrix */
1643   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1644   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1645   j=total_primal_vertices;
1646   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1647     if (!change_basis[i]) {
1648       nnz[j]=temp_indices[i+1]-temp_indices[i];
1649       j++;
1650     }
1651   }
1652   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1653   ierr = PetscFree(nnz);CHKERRQ(ierr);
1654   /* set values in constraint matrix */
1655   for (i=0;i<total_primal_vertices;i++) {
1656     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1657   }
1658   total_counts = total_primal_vertices;
1659   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1660     if (!change_basis[i]) {
1661       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1662       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1663       total_counts++;
1664     }
1665   }
1666   /* assembling */
1667   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   /*
1670   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1671   */
1672   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1673   if (pcbddc->use_change_of_basis) {
1674     PetscBool qr_needed = PETSC_FALSE;
1675     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1676     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1677     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1678     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1679     /* work arrays */
1680     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1681     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1682     /* nonzeros per row */
1683     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1684       if (change_basis[i]) {
1685         qr_needed = PETSC_TRUE;
1686         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1687         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1688       }
1689     }
1690     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1691     ierr = PetscFree(nnz);CHKERRQ(ierr);
1692     /* Set initial identity in the matrix */
1693     for (i=0;i<pcis->n_B;i++) {
1694       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1695     }
1696 
1697     /* Now we loop on the constraints which need a change of basis */
1698     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1699     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1700     /* Change of basis matrix T computed via QR decomposition of constraints */
1701     if (qr_needed) {
1702       /* dual and primal dofs on a single cc */
1703       PetscInt     dual_dofs,primal_dofs;
1704       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1705       PetscInt     primal_counter;
1706       /* working stuff for GEQRF */
1707       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1708       PetscBLASInt lqr_work;
1709       /* working stuff for UNGQR */
1710       PetscScalar  *gqr_work,lgqr_work_t;
1711       PetscBLASInt lgqr_work;
1712       /* working stuff for TRTRS */
1713       PetscScalar  *trs_rhs;
1714       PetscBLASInt Blas_NRHS;
1715       /* pointers for values insertion into change of basis matrix */
1716       PetscInt     *start_rows,*start_cols;
1717       PetscScalar  *start_vals;
1718       /* working stuff for values insertion */
1719       PetscBool    *is_primal;
1720 
1721       /* space to store Q */
1722       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1723       /* first we issue queries for optimal work */
1724       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1725       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1726       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1727       lqr_work = -1;
1728       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1729       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1730       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1731       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1732       lgqr_work = -1;
1733       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1734       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1735       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1736       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1737       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1738       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1739       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1740       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1741       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1742       /* array to store scaling factors for reflectors */
1743       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1744       /* array to store rhs and solution of triangular solver */
1745       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1746       /* array to store whether a node is primal or not */
1747       ierr = PetscMalloc(pcis->n_B*sizeof(*is_primal),&is_primal);CHKERRQ(ierr);
1748       ierr = PetscMemzero(is_primal,pcis->n_B*sizeof(*is_primal));CHKERRQ(ierr);
1749       for (i=0;i<total_primal_vertices;i++) is_primal[local_to_B[aux_primal_numbering[i]]] = PETSC_TRUE;
1750 
1751       /* allocating workspace for check */
1752       if (pcbddc->dbg_flag) {
1753         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1754         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1755         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1756       }
1757 
1758       /* loop on constraints and see whether or not they need a change of basis */
1759       /* -> using implicit ordering contained in temp_indices data */
1760       total_counts = pcbddc->n_vertices;
1761       primal_counter = total_counts;
1762       while (total_counts<pcbddc->local_primal_size) {
1763         primal_dofs = 1;
1764         if (change_basis[total_counts]) {
1765           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1766           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1767             primal_dofs++;
1768           }
1769           /* get constraint info */
1770           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1771           dual_dofs = size_of_constraint-primal_dofs;
1772 
1773           /* copy quadrature constraints for change of basis check */
1774           if (pcbddc->dbg_flag) {
1775             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1776             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1777           }
1778 
1779           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1780           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1781 
1782           /* compute QR decomposition of constraints */
1783           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1784           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1785           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1786           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1787           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1788           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1789           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1790 
1791           /* explictly compute R^-T */
1792           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1793           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1794           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1795           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1796           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1797           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1798           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1799           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1800           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1801           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1802 
1803           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1804           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1805           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1806           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1807           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1808           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1809           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1810           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1811           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1812 
1813           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1814              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1815              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1816           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1817           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1818           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1819           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1820           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1821           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1822           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1823           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1824           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1825           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1826 
1827           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1828           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1829           /* insert cols for primal dofs */
1830           for (j=0;j<primal_dofs;j++) {
1831             start_vals = &qr_basis[j*size_of_constraint];
1832             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1833             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1834           }
1835           /* insert cols for dual dofs */
1836           for (j=0,k=0;j<dual_dofs;k++) {
1837             if (!is_primal[temp_indices_to_constraint_B[temp_indices[total_counts]+k]]) {
1838               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1839               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1840               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1841               j++;
1842             }
1843           }
1844 
1845           /* check change of basis */
1846           if (pcbddc->dbg_flag) {
1847             PetscInt   ii,jj;
1848             PetscBool valid_qr=PETSC_TRUE;
1849             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1850             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1851             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1852             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1853             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1854             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1855             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1856             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1857             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1858             for (jj=0;jj<size_of_constraint;jj++) {
1859               for (ii=0;ii<primal_dofs;ii++) {
1860                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1861                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1862               }
1863             }
1864             if (!valid_qr) {
1865               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1866               for (jj=0;jj<size_of_constraint;jj++) {
1867                 for (ii=0;ii<primal_dofs;ii++) {
1868                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1869                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1870                   }
1871                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1872                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1873                   }
1874                 }
1875               }
1876             } else {
1877               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1878             }
1879           }
1880           /* increment primal counter */
1881           primal_counter += primal_dofs;
1882         } else {
1883           if (pcbddc->dbg_flag) {
1884             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1885           }
1886         }
1887         /* increment constraint counter total_counts */
1888         total_counts += primal_dofs;
1889       }
1890       if (pcbddc->dbg_flag) {
1891         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1892         ierr = PetscFree(work);CHKERRQ(ierr);
1893       }
1894       /* free workspace */
1895       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1896       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1897       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1898       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1899       ierr = PetscFree(is_primal);CHKERRQ(ierr);
1900       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1901     }
1902     /* assembling */
1903     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905     /*
1906     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1907     */
1908   }
1909   /* free workspace */
1910   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1911   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1912   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1913   ierr = PetscFree(change_basis);CHKERRQ(ierr);
1914   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1915   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1916   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1917   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1923 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1924 {
1925   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1926   PC_IS       *pcis = (PC_IS*)pc->data;
1927   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1928   PetscInt    bs,ierr,i,vertex_size;
1929   PetscViewer viewer=pcbddc->dbg_viewer;
1930 
1931   PetscFunctionBegin;
1932   /* Init local Graph struct */
1933   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1934 
1935   /* Check validity of the csr graph passed in by the user */
1936   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1937     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1938   }
1939   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1940   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1941     Mat mat_adj;
1942     const PetscInt *xadj,*adjncy;
1943     PetscBool flg_row=PETSC_TRUE;
1944 
1945     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1946     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1947     if (!flg_row) {
1948       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1949     }
1950     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1951     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1952     if (!flg_row) {
1953       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1954     }
1955     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1956   }
1957 
1958   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1959   vertex_size = 1;
1960   if (!pcbddc->n_ISForDofs) {
1961     IS *custom_ISForDofs;
1962 
1963     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1964     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1965     for (i=0;i<bs;i++) {
1966       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1967     }
1968     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1969     /* remove my references to IS objects */
1970     for (i=0;i<bs;i++) {
1971       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1972     }
1973     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1974   } else { /* mat block size as vertex size (used for elasticity) */
1975     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1976   }
1977 
1978   /* Setup of Graph */
1979   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1980 
1981   /* Graph's connected components analysis */
1982   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1983 
1984   /* print some info to stdout */
1985   if (pcbddc->dbg_flag) {
1986     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1993 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[])
1994 {
1995   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1996   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1997   PetscErrorCode ierr;
1998 
1999   PetscFunctionBegin;
2000   n = 0;
2001   vertices = 0;
2002   if (pcbddc->ConstraintMatrix) {
2003     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2004     for (i=0;i<local_primal_size;i++) {
2005       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2006       if (size_of_constraint == 1) n++;
2007       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2008     }
2009     if (vertices_idx) {
2010       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2011       n = 0;
2012       for (i=0;i<local_primal_size;i++) {
2013         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2014         if (size_of_constraint == 1) {
2015           vertices[n++]=row_cmat_indices[0];
2016         }
2017         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2018       }
2019     }
2020   }
2021   *n_vertices = n;
2022   if (vertices_idx) *vertices_idx = vertices;
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2028 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[])
2029 {
2030   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2031   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2032   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2033   PetscBool      *touched;
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   n = 0;
2038   constraints_index = 0;
2039   if (pcbddc->ConstraintMatrix) {
2040     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2041     max_size_of_constraint = 0;
2042     for (i=0;i<local_primal_size;i++) {
2043       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2044       if (size_of_constraint > 1) {
2045         n++;
2046       }
2047       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2048       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2049     }
2050     if (constraints_idx) {
2051       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2052       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2053       ierr = PetscMalloc(local_size*sizeof(PetscBool),&touched);CHKERRQ(ierr);
2054       ierr = PetscMemzero(touched,local_size*sizeof(PetscBool));CHKERRQ(ierr);
2055       n = 0;
2056       for (i=0;i<local_primal_size;i++) {
2057         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2058         if (size_of_constraint > 1) {
2059           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2060           /* find first untouched local node */
2061           j = 0;
2062           while(touched[row_cmat_indices[j]]) j++;
2063           min_index = row_cmat_global_indices[j];
2064           min_loc = j;
2065           /* search the minimum among nodes not yet touched on the connected component
2066              since there can be more than one constraint on a single cc */
2067           for (j=1;j<size_of_constraint;j++) {
2068             if (min_index > row_cmat_global_indices[j] && !touched[row_cmat_indices[j]]) {
2069               min_index = row_cmat_global_indices[j];
2070               min_loc = j;
2071             }
2072           }
2073           touched[row_cmat_indices[min_loc]] = PETSC_TRUE;
2074           constraints_index[n++] = row_cmat_indices[min_loc];
2075         }
2076         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2077       }
2078       ierr = PetscFree(touched);CHKERRQ(ierr);
2079       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2080     }
2081   }
2082   *n_constraints = n;
2083   if (constraints_idx) *constraints_idx = constraints_index;
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 /* the next two functions has been adapted from pcis.c */
2088 #undef __FUNCT__
2089 #define __FUNCT__ "PCBDDCApplySchur"
2090 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2091 {
2092   PetscErrorCode ierr;
2093   PC_IS          *pcis = (PC_IS*)(pc->data);
2094 
2095   PetscFunctionBegin;
2096   if (!vec2_B) { vec2_B = v; }
2097   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2098   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2099   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2100   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2101   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2107 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2108 {
2109   PetscErrorCode ierr;
2110   PC_IS          *pcis = (PC_IS*)(pc->data);
2111 
2112   PetscFunctionBegin;
2113   if (!vec2_B) { vec2_B = v; }
2114   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2115   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2116   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2117   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2118   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "PCBDDCSubsetNumbering"
2124 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2125 {
2126   Vec            local_vec,global_vec;
2127   IS             seqis,paris;
2128   VecScatter     scatter_ctx;
2129   PetscScalar    *array;
2130   PetscInt       *temp_global_dofs;
2131   PetscScalar    globalsum;
2132   PetscInt       i,j,s;
2133   PetscInt       nlocals,first_index,old_index,max_local;
2134   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2135   PetscMPIInt    *dof_sizes,*dof_displs;
2136   PetscBool      first_found;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   /* mpi buffers */
2141   MPI_Comm_size(comm,&size_prec_comm);
2142   MPI_Comm_rank(comm,&rank_prec_comm);
2143   j = ( !rank_prec_comm ? size_prec_comm : 0);
2144   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2145   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2146   /* get maximum size of subset */
2147   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2148   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2149   max_local = 0;
2150   if (n_local_dofs) {
2151     max_local = temp_global_dofs[0];
2152     for (i=1;i<n_local_dofs;i++) {
2153       if (max_local < temp_global_dofs[i] ) {
2154         max_local = temp_global_dofs[i];
2155       }
2156     }
2157   }
2158   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2159   max_global++;
2160   max_local = 0;
2161   if (n_local_dofs) {
2162     max_local = local_dofs[0];
2163     for (i=1;i<n_local_dofs;i++) {
2164       if (max_local < local_dofs[i] ) {
2165         max_local = local_dofs[i];
2166       }
2167     }
2168   }
2169   max_local++;
2170   /* allocate workspace */
2171   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2172   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2173   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2174   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2175   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2176   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2177   /* create scatter */
2178   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2179   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2180   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2181   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2182   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2183   /* init array */
2184   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2185   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2186   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2187   if (local_dofs_mult) {
2188     for (i=0;i<n_local_dofs;i++) {
2189       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2190     }
2191   } else {
2192     for (i=0;i<n_local_dofs;i++) {
2193       array[local_dofs[i]]=1.0;
2194     }
2195   }
2196   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2197   /* scatter into global vec and get total number of global dofs */
2198   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2199   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2200   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2201   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2202   /* Fill global_vec with cumulative function for global numbering */
2203   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2204   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2205   nlocals = 0;
2206   first_index = -1;
2207   first_found = PETSC_FALSE;
2208   for (i=0;i<s;i++) {
2209     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2210       first_found = PETSC_TRUE;
2211       first_index = i;
2212     }
2213     nlocals += (PetscInt)PetscRealPart(array[i]);
2214   }
2215   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2216   if (!rank_prec_comm) {
2217     dof_displs[0]=0;
2218     for (i=1;i<size_prec_comm;i++) {
2219       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2220     }
2221   }
2222   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2223   if (first_found) {
2224     array[first_index] += (PetscScalar)nlocals;
2225     old_index = first_index;
2226     for (i=first_index+1;i<s;i++) {
2227       if (PetscRealPart(array[i]) > 0.0) {
2228         array[i] += array[old_index];
2229         old_index = i;
2230       }
2231     }
2232   }
2233   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2234   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2235   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2236   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2237   /* get global ordering of local dofs */
2238   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2239   if (local_dofs_mult) {
2240     for (i=0;i<n_local_dofs;i++) {
2241       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2242     }
2243   } else {
2244     for (i=0;i<n_local_dofs;i++) {
2245       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2246     }
2247   }
2248   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2249   /* free workspace */
2250   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2251   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2252   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2253   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2254   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2255   /* return pointer to global ordering of local dofs */
2256   *global_numbering_subset = temp_global_dofs;
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2262 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2263 {
2264   PetscInt       i,j;
2265   PetscScalar    *alphas;
2266   PetscErrorCode ierr;
2267 
2268   PetscFunctionBegin;
2269   /* this implements stabilized Gram-Schmidt */
2270   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2271   for (i=0;i<n;i++) {
2272     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2273     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2274     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2275   }
2276   ierr = PetscFree(alphas);CHKERRQ(ierr);
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 /* BDDC requires metis 5.0.1 for multilevel */
2281 #if defined(PETSC_HAVE_METIS)
2282 #include "metis.h"
2283 #define MetisInt    idx_t
2284 #define MetisScalar real_t
2285 #endif
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "PCBDDCSetUpCoarseEnvironment"
2289 PetscErrorCode PCBDDCSetUpCoarseEnvironment(PC pc,PetscScalar* coarse_submat_vals)
2290 {
2291   Mat_IS    *matis    = (Mat_IS*)pc->pmat->data;
2292   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
2293   PC_IS     *pcis     = (PC_IS*)pc->data;
2294   MPI_Comm  prec_comm;
2295   MPI_Comm  coarse_comm;
2296 
2297   PetscInt  coarse_size;
2298 
2299   MatNullSpace CoarseNullSpace;
2300 
2301   /* common to all choiches */
2302   PetscScalar *temp_coarse_mat_vals;
2303   PetscScalar *ins_coarse_mat_vals;
2304   PetscInt    *ins_local_primal_indices;
2305   PetscMPIInt *localsizes2,*localdispl2;
2306   PetscMPIInt size_prec_comm;
2307   PetscMPIInt rank_prec_comm;
2308   PetscMPIInt active_rank=MPI_PROC_NULL;
2309   PetscMPIInt master_proc=0;
2310   PetscInt    ins_local_primal_size;
2311   /* specific to MULTILEVEL_BDDC */
2312   PetscMPIInt *ranks_recv=0;
2313   PetscMPIInt count_recv=0;
2314   PetscMPIInt rank_coarse_proc_send_to=-1;
2315   PetscMPIInt coarse_color = MPI_UNDEFINED;
2316   ISLocalToGlobalMapping coarse_ISLG;
2317   /* some other variables */
2318   PetscErrorCode ierr;
2319   MatType coarse_mat_type;
2320   PCType  coarse_pc_type;
2321   KSPType coarse_ksp_type;
2322   PC pc_temp;
2323   PetscInt i,j,k;
2324   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
2325   /* verbose output viewer */
2326   PetscViewer viewer=pcbddc->dbg_viewer;
2327   PetscInt    dbg_flag=pcbddc->dbg_flag;
2328 
2329   PetscInt      offset,offset2;
2330   PetscMPIInt   im_active,active_procs;
2331   PetscInt      *dnz,*onz;
2332 
2333   PetscBool     setsym,issym=PETSC_FALSE;
2334 
2335   PetscFunctionBegin;
2336   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2337   ins_local_primal_indices = 0;
2338   ins_coarse_mat_vals      = 0;
2339   localsizes2              = 0;
2340   localdispl2              = 0;
2341   temp_coarse_mat_vals     = 0;
2342   coarse_ISLG              = 0;
2343 
2344   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2345   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2346   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2347 
2348   /* Assign global numbering to coarse dofs */
2349   {
2350     PetscInt     *auxlocal_primal,*aux_idx;
2351     PetscMPIInt  mpi_local_primal_size;
2352     PetscScalar  coarsesum,*array;
2353 
2354     mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
2355 
2356     /* Construct needed data structures for message passing */
2357     j = 0;
2358     if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2359       j = size_prec_comm;
2360     }
2361     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr);
2362     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2363     /* Gather local_primal_size information for all processes  */
2364     if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2365       ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2366     } else {
2367       ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr);
2368     }
2369     pcbddc->replicated_primal_size = 0;
2370     for (i=0; i<j; i++) {
2371       pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ;
2372       pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
2373     }
2374 
2375     /* This code fragment assumes that the number of local constraints per connected component
2376        is not greater than the number of nodes defined for the connected component
2377        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2378     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2379     ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2380     ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2381     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2382     ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2383     ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2384     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2385     /* Compute number of coarse dofs */
2386     ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr);
2387 
2388     if (dbg_flag) {
2389       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2390       ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2391       ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr);
2392       ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2393       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2394       for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0;
2395       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2396       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2397       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2398       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2399       ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2400       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2401       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2402       for (i=0;i<pcis->n;i++) {
2403         if (array[i] == 1.0) {
2404           ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr);
2405           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr);
2406         }
2407       }
2408       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2409       for (i=0;i<pcis->n;i++) {
2410         if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
2411       }
2412       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2413       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2414       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2415       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2416       ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
2417       ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr);
2418       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2419     }
2420     ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
2421   }
2422 
2423   if (dbg_flag) {
2424     ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr);
2425     if (dbg_flag > 1) {
2426       ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
2427       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2428       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2429       for (i=0;i<pcbddc->local_primal_size;i++) {
2430         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]);
2431       }
2432       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2433     }
2434   }
2435 
2436   im_active = 0;
2437   if (pcis->n) im_active = 1;
2438   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr);
2439 
2440   /* adapt coarse problem type */
2441 #if defined(PETSC_HAVE_METIS)
2442   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2443     if (pcbddc->current_level < pcbddc->max_levels) {
2444       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2445         if (dbg_flag) {
2446           ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2447          ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2448         }
2449         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2450       }
2451     } else {
2452       if (dbg_flag) {
2453         ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2454         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2455       }
2456       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2457     }
2458   }
2459 #else
2460   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2461 #endif
2462 
2463   switch(pcbddc->coarse_problem_type){
2464 
2465     case(MULTILEVEL_BDDC):   /* we define a coarse mesh where subdomains are elements */
2466     {
2467 #if defined(PETSC_HAVE_METIS)
2468       /* we need additional variables */
2469       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2470       MetisInt    *metis_coarse_subdivision;
2471       MetisInt    options[METIS_NOPTIONS];
2472       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2473       PetscMPIInt procs_jumps_coarse_comm;
2474       PetscMPIInt *coarse_subdivision;
2475       PetscMPIInt *total_count_recv;
2476       PetscMPIInt *total_ranks_recv;
2477       PetscMPIInt *displacements_recv;
2478       PetscMPIInt *my_faces_connectivity;
2479       PetscMPIInt *petsc_faces_adjncy;
2480       MetisInt    *faces_adjncy;
2481       MetisInt    *faces_xadj;
2482       PetscMPIInt *number_of_faces;
2483       PetscMPIInt *faces_displacements;
2484       PetscInt    *array_int;
2485       PetscMPIInt my_faces=0;
2486       PetscMPIInt total_faces=0;
2487       PetscInt    ranks_stretching_ratio;
2488 
2489       /* define some quantities */
2490       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2491       coarse_mat_type = MATIS;
2492       coarse_pc_type  = PCBDDC;
2493       coarse_ksp_type = KSPRICHARDSON;
2494 
2495       /* details of coarse decomposition */
2496       n_subdomains = active_procs;
2497       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2498       ranks_stretching_ratio = size_prec_comm/active_procs;
2499       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2500 
2501 #if 0
2502       PetscMPIInt *old_ranks;
2503       PetscInt    *new_ranks,*jj,*ii;
2504       MatPartitioning mat_part;
2505       IS coarse_new_decomposition,is_numbering;
2506       PetscViewer viewer_test;
2507       MPI_Comm    test_coarse_comm;
2508       PetscMPIInt test_coarse_color;
2509       Mat         mat_adj;
2510       /* Create new communicator for coarse problem splitting the old one */
2511       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2512          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2513       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2514       test_coarse_comm = MPI_COMM_NULL;
2515       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2516       if (im_active) {
2517         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2518         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2519         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2520         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2521         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2522         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2523         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2524         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2525         k = pcis->n_neigh-1;
2526         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2527         ii[0]=0;
2528         ii[1]=k;
2529         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2530         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2531         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2532         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2533         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2534         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2535         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2536         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2537         printf("Setting Nparts %d\n",n_parts);
2538         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2539         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2540         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2541         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2542         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2543         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2544         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2545         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2546         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2547         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2548         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2549         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2550         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2551       }
2552 #endif
2553 
2554       /* build CSR graph of subdomains' connectivity */
2555       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2556       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2557       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2558         for (j=0;j<pcis->n_shared[i];j++){
2559           array_int[ pcis->shared[i][j] ]+=1;
2560         }
2561       }
2562       for (i=1;i<pcis->n_neigh;i++){
2563         for (j=0;j<pcis->n_shared[i];j++){
2564           if (array_int[ pcis->shared[i][j] ] > 0 ){
2565             my_faces++;
2566             break;
2567           }
2568         }
2569       }
2570 
2571       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2572       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2573       my_faces=0;
2574       for (i=1;i<pcis->n_neigh;i++){
2575         for (j=0;j<pcis->n_shared[i];j++){
2576           if (array_int[ pcis->shared[i][j] ] > 0 ){
2577             my_faces_connectivity[my_faces]=pcis->neigh[i];
2578             my_faces++;
2579             break;
2580           }
2581         }
2582       }
2583       if (rank_prec_comm == master_proc) {
2584         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2585         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2586         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2587         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2588         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2589       }
2590       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2591       if (rank_prec_comm == master_proc) {
2592         faces_xadj[0]=0;
2593         faces_displacements[0]=0;
2594         j=0;
2595         for (i=1;i<size_prec_comm+1;i++) {
2596           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2597           if (number_of_faces[i-1]) {
2598             j++;
2599             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2600           }
2601         }
2602       }
2603       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2604       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2605       ierr = PetscFree(array_int);CHKERRQ(ierr);
2606       if (rank_prec_comm == master_proc) {
2607         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2608         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2609         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2610         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2611       }
2612 
2613       if ( rank_prec_comm == master_proc ) {
2614 
2615         PetscInt heuristic_for_metis=3;
2616 
2617         ncon=1;
2618         faces_nvtxs=n_subdomains;
2619         /* partition graoh induced by face connectivity */
2620         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2621         ierr = METIS_SetDefaultOptions(options);
2622         /* we need a contiguous partition of the coarse mesh */
2623         options[METIS_OPTION_CONTIG]=1;
2624         options[METIS_OPTION_NITER]=30;
2625         if (pcbddc->coarsening_ratio > 1) {
2626           if (n_subdomains>n_parts*heuristic_for_metis) {
2627             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2628             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2629             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2630             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2631           } else {
2632             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2633             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2634           }
2635         } else {
2636           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2637         }
2638         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2639         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2640         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2641 
2642         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2643         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2644         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2645         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2646       }
2647 
2648       /* Create new communicator for coarse problem splitting the old one */
2649       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2650         coarse_color=0;              /* for communicator splitting */
2651         active_rank=rank_prec_comm;  /* for insertion of matrix values */
2652       }
2653       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2654          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2655       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2656 
2657       if ( coarse_color == 0 ) {
2658         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2659         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2660       } else {
2661         rank_coarse_comm = MPI_PROC_NULL;
2662       }
2663 
2664       /* master proc take care of arranging and distributing coarse information */
2665       if (rank_coarse_comm == master_proc) {
2666         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2667         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2668         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2669         /* some initializations */
2670         displacements_recv[0]=0;
2671         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2672         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2673         for (j=0;j<size_coarse_comm;j++) {
2674           for (i=0;i<size_prec_comm;i++) {
2675           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2676           }
2677         }
2678         /* displacements needed for scatterv of total_ranks_recv */
2679       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2680 
2681         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2682         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2683         for (j=0;j<size_coarse_comm;j++) {
2684           for (i=0;i<size_prec_comm;i++) {
2685             if (coarse_subdivision[i]==j) {
2686               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2687               total_count_recv[j]+=1;
2688             }
2689           }
2690         }
2691         /*for (j=0;j<size_coarse_comm;j++) {
2692           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2693           for (i=0;i<total_count_recv[j];i++) {
2694             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2695           }
2696           printf("\n");
2697         }*/
2698 
2699         /* identify new decomposition in terms of ranks in the old communicator */
2700         for (i=0;i<n_subdomains;i++) {
2701           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2702         }
2703         /*printf("coarse_subdivision in old end new ranks\n");
2704         for (i=0;i<size_prec_comm;i++)
2705           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2706             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2707           } else {
2708             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2709           }
2710         printf("\n");*/
2711       }
2712 
2713       /* Scatter new decomposition for send details */
2714       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2715       /* Scatter receiving details to members of coarse decomposition */
2716       if ( coarse_color == 0) {
2717         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2718         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2719         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2720       }
2721 
2722       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2723       if (coarse_color == 0) {
2724         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2725         for (i=0;i<count_recv;i++)
2726           printf("%d ",ranks_recv[i]);
2727         printf("\n");
2728       }*/
2729 
2730       if (rank_prec_comm == master_proc) {
2731         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2732         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2733         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2734         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2735       }
2736 #endif
2737       break;
2738     }
2739 
2740     case(REPLICATED_BDDC):
2741 
2742       pcbddc->coarse_communications_type = GATHERS_BDDC;
2743       coarse_mat_type = MATSEQAIJ;
2744       coarse_pc_type  = PCLU;
2745       coarse_ksp_type  = KSPPREONLY;
2746       coarse_comm = PETSC_COMM_SELF;
2747       active_rank = rank_prec_comm;
2748       break;
2749 
2750     case(PARALLEL_BDDC):
2751 
2752       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2753       coarse_mat_type = MATAIJ;
2754       coarse_pc_type  = PCREDUNDANT;
2755       coarse_ksp_type  = KSPPREONLY;
2756       coarse_comm = prec_comm;
2757       active_rank = rank_prec_comm;
2758       break;
2759 
2760     case(SEQUENTIAL_BDDC):
2761       pcbddc->coarse_communications_type = GATHERS_BDDC;
2762       coarse_mat_type = MATAIJ;
2763       coarse_pc_type = PCLU;
2764       coarse_ksp_type  = KSPPREONLY;
2765       coarse_comm = PETSC_COMM_SELF;
2766       active_rank = master_proc;
2767       break;
2768   }
2769 
2770   switch(pcbddc->coarse_communications_type){
2771 
2772     case(SCATTERS_BDDC):
2773       {
2774         if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2775 
2776           IS coarse_IS;
2777 
2778           if(pcbddc->coarsening_ratio == 1) {
2779             ins_local_primal_size = pcbddc->local_primal_size;
2780             ins_local_primal_indices = pcbddc->local_primal_indices;
2781             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2782             /* nonzeros */
2783             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2784             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2785             for (i=0;i<ins_local_primal_size;i++) {
2786               dnz[i] = ins_local_primal_size;
2787             }
2788           } else {
2789             PetscMPIInt send_size;
2790             PetscMPIInt *send_buffer;
2791             PetscInt    *aux_ins_indices;
2792             PetscInt    ii,jj;
2793             MPI_Request *requests;
2794 
2795             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2796             /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */
2797             ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
2798             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2799             pcbddc->replicated_primal_size = count_recv;
2800             j = 0;
2801             for (i=0;i<count_recv;i++) {
2802               pcbddc->local_primal_displacements[i] = j;
2803               j += pcbddc->local_primal_sizes[ranks_recv[i]];
2804             }
2805             pcbddc->local_primal_displacements[count_recv] = j;
2806             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2807             /* allocate auxiliary space */
2808             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2809             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2810             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2811             /* allocate stuffs for message massing */
2812             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2813             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2814             /* send indices to be inserted */
2815             for (i=0;i<count_recv;i++) {
2816               send_size = pcbddc->local_primal_sizes[ranks_recv[i]];
2817               ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2818             }
2819             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2820               send_size = pcbddc->local_primal_size;
2821               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2822               for (i=0;i<send_size;i++) {
2823                 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2824               }
2825               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2826             }
2827             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2828             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2829               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2830             }
2831             j = 0;
2832             for (i=0;i<count_recv;i++) {
2833               ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i];
2834               localsizes2[i] = ii*ii;
2835               localdispl2[i] = j;
2836               j += localsizes2[i];
2837               jj = pcbddc->local_primal_displacements[i];
2838               /* it counts the coarse subdomains sharing the coarse node */
2839               for (k=0;k<ii;k++) {
2840                 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1;
2841               }
2842             }
2843             /* temp_coarse_mat_vals used to store matrix values to be received */
2844             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2845             /* evaluate how many values I will insert in coarse mat */
2846             ins_local_primal_size = 0;
2847             for (i=0;i<coarse_size;i++) {
2848               if (aux_ins_indices[i]) {
2849                 ins_local_primal_size++;
2850               }
2851             }
2852             /* evaluate indices I will insert in coarse mat */
2853             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2854             j = 0;
2855             for(i=0;i<coarse_size;i++) {
2856               if(aux_ins_indices[i]) {
2857                 ins_local_primal_indices[j] = i;
2858                 j++;
2859               }
2860             }
2861             /* processes partecipating in coarse problem receive matrix data from their friends */
2862             for (i=0;i<count_recv;i++) {
2863               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2864             }
2865             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2866               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2867               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2868             }
2869             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2870             /* nonzeros */
2871             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2872             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2873             /* use aux_ins_indices to realize a global to local mapping */
2874             j=0;
2875             for(i=0;i<coarse_size;i++){
2876               if(aux_ins_indices[i]==0){
2877                 aux_ins_indices[i]=-1;
2878               } else {
2879                 aux_ins_indices[i]=j;
2880                 j++;
2881               }
2882             }
2883             for (i=0;i<count_recv;i++) {
2884               j = pcbddc->local_primal_sizes[ranks_recv[i]];
2885               for (k=0;k<j;k++) {
2886                 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j;
2887               }
2888             }
2889             /* check */
2890             for (i=0;i<ins_local_primal_size;i++) {
2891               if (dnz[i] > ins_local_primal_size) {
2892                 dnz[i] = ins_local_primal_size;
2893               }
2894             }
2895             ierr = PetscFree(requests);CHKERRQ(ierr);
2896             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2897             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2898           }
2899           /* create local to global mapping needed by coarse MATIS */
2900           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2901           coarse_comm = prec_comm;
2902           active_rank = rank_prec_comm;
2903           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2904           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2905           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2906         } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2907           /* arrays for values insertion */
2908           ins_local_primal_size = pcbddc->local_primal_size;
2909           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2910           ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr);
2911           for (j=0;j<ins_local_primal_size;j++){
2912             ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
2913             for (i=0;i<ins_local_primal_size;i++) {
2914               ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2915             }
2916           }
2917         }
2918         break;
2919 
2920     }
2921 
2922     case(GATHERS_BDDC):
2923       {
2924 
2925         PetscMPIInt mysize,mysize2;
2926         PetscMPIInt *send_buffer;
2927 
2928         if (rank_prec_comm==active_rank) {
2929           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2930           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
2931           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2932           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2933           /* arrays for values insertion */
2934       for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
2935           localdispl2[0]=0;
2936       for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
2937           j=0;
2938       for (i=0;i<size_prec_comm;i++) j+=localsizes2[i];
2939           ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2940         }
2941 
2942         mysize=pcbddc->local_primal_size;
2943         mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
2944         ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2945     for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2946 
2947         if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){
2948           ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2949           ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr);
2950         } else {
2951           ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr);
2952           ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr);
2953         }
2954         ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2955         break;
2956       }/* switch on coarse problem and communications associated with finished */
2957   }
2958 
2959   /* Now create and fill up coarse matrix */
2960   if ( rank_prec_comm == active_rank ) {
2961 
2962     Mat matis_coarse_local_mat;
2963 
2964     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2965       ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr);
2966       ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr);
2967       ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr);
2968       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2969       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2970       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2971       ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2972       ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2973     } else {
2974       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr);
2975       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2976       ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2977       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2978       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2979       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2980       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2981       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2982     }
2983     /* preallocation */
2984     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2985 
2986       PetscInt lrows,lcols,bs;
2987 
2988       ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr);
2989       ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr);
2990       ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr);
2991 
2992       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2993 
2994         Vec         vec_dnz,vec_onz;
2995         PetscScalar *my_dnz,*my_onz,*array;
2996         PetscInt    *mat_ranges,*row_ownership;
2997         PetscInt    coarse_index_row,coarse_index_col,owner;
2998 
2999         ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr);
3000         ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
3001         ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr);
3002         ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr);
3003         ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
3004 
3005         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr);
3006         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr);
3007         ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3008         ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3009 
3010         ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr);
3011         ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
3012         for (i=0;i<size_prec_comm;i++) {
3013           for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
3014             row_ownership[j]=i;
3015           }
3016         }
3017 
3018         for (i=0;i<pcbddc->local_primal_size;i++) {
3019           coarse_index_row = pcbddc->local_primal_indices[i];
3020           owner = row_ownership[coarse_index_row];
3021           for (j=i;j<pcbddc->local_primal_size;j++) {
3022             owner = row_ownership[coarse_index_row];
3023             coarse_index_col = pcbddc->local_primal_indices[j];
3024             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) {
3025               my_dnz[i] += 1.0;
3026             } else {
3027               my_onz[i] += 1.0;
3028             }
3029             if (i != j) {
3030               owner = row_ownership[coarse_index_col];
3031               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) {
3032                 my_dnz[j] += 1.0;
3033               } else {
3034                 my_onz[j] += 1.0;
3035               }
3036             }
3037           }
3038         }
3039         ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
3040         ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
3041         if (pcbddc->local_primal_size) {
3042           ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
3043           ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
3044         }
3045         ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
3046         ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
3047         ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
3048         ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
3049         j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
3050         ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
3051         for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
3052 
3053         ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
3054         ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
3055         for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
3056 
3057         ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
3058         ierr = PetscFree(my_dnz);CHKERRQ(ierr);
3059         ierr = PetscFree(my_onz);CHKERRQ(ierr);
3060         ierr = PetscFree(row_ownership);CHKERRQ(ierr);
3061         ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
3062         ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
3063       } else {
3064         for (k=0;k<size_prec_comm;k++){
3065           offset=pcbddc->local_primal_displacements[k];
3066           offset2=localdispl2[k];
3067           ins_local_primal_size = pcbddc->local_primal_sizes[k];
3068           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3069           for (j=0;j<ins_local_primal_size;j++){
3070             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3071           }
3072           for (j=0;j<ins_local_primal_size;j++) {
3073             ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr);
3074           }
3075           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3076         }
3077       }
3078 
3079       /* check */
3080       for (i=0;i<lrows;i++) {
3081         if (dnz[i]>lcols) dnz[i]=lcols;
3082         if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols;
3083       }
3084       ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr);
3085       ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr);
3086       ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3087     } else {
3088       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
3089       ierr = PetscFree(dnz);CHKERRQ(ierr);
3090     }
3091     /* insert values */
3092     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
3093       ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3094     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3095       if (pcbddc->coarsening_ratio == 1) {
3096         ins_coarse_mat_vals = coarse_submat_vals;
3097         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
3098       } else {
3099         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3100         for (k=0;k<pcbddc->replicated_primal_size;k++) {
3101           offset = pcbddc->local_primal_displacements[k];
3102           offset2 = localdispl2[k];
3103           ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k];
3104           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3105           for (j=0;j<ins_local_primal_size;j++){
3106             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3107           }
3108           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3109           ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3110           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3111         }
3112       }
3113       ins_local_primal_indices = 0;
3114       ins_coarse_mat_vals = 0;
3115     } else {
3116       for (k=0;k<size_prec_comm;k++){
3117         offset=pcbddc->local_primal_displacements[k];
3118         offset2=localdispl2[k];
3119         ins_local_primal_size = pcbddc->local_primal_sizes[k];
3120         ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3121         for (j=0;j<ins_local_primal_size;j++){
3122           ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3123         }
3124         ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3125         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3126         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3127       }
3128       ins_local_primal_indices = 0;
3129       ins_coarse_mat_vals = 0;
3130     }
3131     ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3132     ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3133     /* symmetry of coarse matrix */
3134     if (issym) {
3135       ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3136     }
3137     ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3138   }
3139 
3140   /* create loc to glob scatters if needed */
3141   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
3142      IS local_IS,global_IS;
3143      ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr);
3144      ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr);
3145      ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3146      ierr = ISDestroy(&local_IS);CHKERRQ(ierr);
3147      ierr = ISDestroy(&global_IS);CHKERRQ(ierr);
3148   }
3149 
3150   /* free memory no longer needed */
3151   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
3152   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
3153   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
3154   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
3155   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
3156   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
3157 
3158   /* Compute coarse null space */
3159   CoarseNullSpace = 0;
3160   if (pcbddc->NullSpace) {
3161     ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr);
3162   }
3163 
3164   /* KSP for coarse problem */
3165   if (rank_prec_comm == active_rank) {
3166     PetscBool isbddc=PETSC_FALSE;
3167 
3168     ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr);
3169     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3170     ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3171     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr);
3172     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3173     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3174     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3175     /* Allow user's customization */
3176     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
3177     /* Set Up PC for coarse problem BDDC */
3178     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3179       i = pcbddc->current_level+1;
3180       ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr);
3181       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3182       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3183       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
3184       if (CoarseNullSpace) {
3185         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3186       }
3187       if (dbg_flag) {
3188         ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr);
3189         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3190       }
3191     } else {
3192       if (CoarseNullSpace) {
3193         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3194       }
3195     }
3196     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3197     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3198 
3199     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr);
3200     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3201     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3202     if (j == 1) {
3203       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3204       if (isbddc) {
3205         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
3206       }
3207     }
3208   }
3209   /* Check coarse problem if requested */
3210   if ( dbg_flag && rank_prec_comm == active_rank ) {
3211     KSP check_ksp;
3212     PC  check_pc;
3213     Vec check_vec;
3214     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
3215     KSPType check_ksp_type;
3216 
3217     /* Create ksp object suitable for extreme eigenvalues' estimation */
3218     ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr);
3219     ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3220     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3221     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3222       if (issym) check_ksp_type = KSPCG;
3223       else check_ksp_type = KSPGMRES;
3224       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3225     } else {
3226       check_ksp_type = KSPPREONLY;
3227     }
3228     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3229     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3230     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3231     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3232     /* create random vec */
3233     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3234     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3235     if (CoarseNullSpace) {
3236       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3237     }
3238     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3239     /* solve coarse problem */
3240     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3241     if (CoarseNullSpace) {
3242       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3243     }
3244     /* check coarse problem residual error */
3245     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3246     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3247     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3248     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3249     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3250     /* get eigenvalue estimation if inexact */
3251     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3252       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3253       ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr);
3254       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr);
3255       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3256     }
3257     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3258     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3259     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3260   }
3261   if (dbg_flag) {
3262     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3263   }
3264   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3265 
3266   PetscFunctionReturn(0);
3267 }
3268 
3269