xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 8629588b77265804a2d0a4b5f5e07d5e1abdef9d)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCSetLevel"
47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
48 {
49   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   pcbddc->current_level=level;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCBDDCResetCustomization"
58 PetscErrorCode PCBDDCResetCustomization(PC pc)
59 {
60   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
61   PetscInt       i;
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
66   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
67   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
68   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
70   for (i=0;i<pcbddc->n_ISForDofs;i++) {
71     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "PCBDDCResetTopography"
79 PetscErrorCode PCBDDCResetTopography(PC pc)
80 {
81   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
86   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
87   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCBDDCResetSolvers"
93 PetscErrorCode PCBDDCResetSolvers(PC pc)
94 {
95   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
101   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->coarse_mat);CHKERRQ(ierr);
103   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
104   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
105   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
106   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
107   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
108   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
110   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
113   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
114   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
115   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
116   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
117   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
118   ierr = PetscFree(pcbddc->local_primal_indices);CHKERRQ(ierr);
119   ierr = PetscFree(pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
120   ierr = PetscFree(pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
121   ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
122   ierr = PetscFree(pcbddc->local_primal_sizes);CHKERRQ(ierr);
123   PetscFunctionReturn(0);
124 }
125 
126 #undef __FUNCT__
127 #define __FUNCT__ "PCBDDCCreateWorkVectors"
128 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
129 {
130   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
131   PC_IS          *pcis = (PC_IS*)pc->data;
132   VecType        impVecType;
133   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
134   PetscErrorCode ierr;
135 
136   PetscFunctionBegin;
137   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
138   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
139   local_primal_size = n_constraints+n_vertices;
140   n_R = pcis->n-n_vertices;
141   /* local work vectors */
142   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
143   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
144   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
145   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
146   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
147   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
148   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
149   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
150   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
151   if (n_constraints) {
152     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
153     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
154     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
155   }
156   PetscFunctionReturn(0);
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
161 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
162 {
163   PetscErrorCode         ierr;
164   /* pointers to pcis and pcbddc */
165   PC_IS*                 pcis = (PC_IS*)pc->data;
166   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
167   /* submatrices of local problem */
168   Mat                    A_RV,A_VR,A_VV;
169   /* working matrices */
170   Mat                    M1,M2,M3,C_CR;
171   /* working vectors */
172   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
173   /* additional working stuff */
174   IS                     is_aux;
175   ISLocalToGlobalMapping BtoNmap;
176   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
177   const PetscScalar      *array,*row_cmat_values;
178   const PetscInt         *row_cmat_indices,*idx_R_local;
179   PetscInt               *vertices,*idx_V_B,*auxindices;
180   PetscInt               n_vertices,n_constraints,size_of_constraint;
181   PetscInt               i,j,n_R,n_D,n_B;
182   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
183   /* Vector and matrix types */
184   VecType                impVecType;
185   MatType                impMatType;
186   /* some shortcuts to scalars */
187   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
188   /* for debugging purposes */
189   PetscReal              *coarsefunctions_errors,*constraints_errors;
190 
191   PetscFunctionBegin;
192   /* get number of vertices and their local indices */
193   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
194   n_constraints = pcbddc->local_primal_size-n_vertices;
195   /* Set Non-overlapping dimensions */
196   n_B = pcis->n_B; n_D = pcis->n - n_B;
197   n_R = pcis->n-n_vertices;
198 
199   /* Set types for local objects needed by BDDC precondtioner */
200   impMatType = MATSEQDENSE;
201   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
202 
203   /* Allocating some extra storage just to be safe */
204   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
205   for (i=0;i<pcis->n;i++) auxindices[i]=i;
206 
207   /* vertices in boundary numbering */
208   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
209   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
210   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
211   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
212   if (i != n_vertices) {
213     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
214   }
215 
216   /* some work vectors on vertices and/or constraints */
217   if (n_vertices) {
218     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
219     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
220     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
221     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
222   }
223   if (n_constraints) {
224     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
225     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
226   }
227 
228   /* Precompute stuffs needed for preprocessing and application of BDDC*/
229   if (n_constraints) {
230     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
231     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
232     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
233     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
234 
235     /* Extract constraints on R nodes: C_{CR}  */
236     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
237     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
238     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
239 
240     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
241     for (i=0;i<n_constraints;i++) {
242       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
243       /* Get row of constraint matrix in R numbering */
244       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
245       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
246       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
247       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
248       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
249       /* Solve for row of constraint matrix in R numbering */
250       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
251       /* Set values in local_auxmat2 */
252       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
253       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
254       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
255     }
256     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
257     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
258     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
259 
260     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
261     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
262     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
263     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
264     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
265     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
266     ierr = MatSetUp(M1);CHKERRQ(ierr);
267     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
268     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
269     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
270     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
271     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
272     ierr = MatDestroy(&M2);CHKERRQ(ierr);
273     ierr = MatDestroy(&M3);CHKERRQ(ierr);
274     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
275     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
276   }
277 
278   /* Get submatrices from subdomain matrix */
279   if (n_vertices) {
280     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
281     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
282     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
283     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
284     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
285   }
286 
287   /* Matrix of coarse basis functions (local) */
288   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
289   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
290   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
291   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
292   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
293     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
294     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
295     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
296     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
297   }
298 
299   if (pcbddc->dbg_flag) {
300     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
301     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
302     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
303   }
304   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
305   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
306 
307   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
308 
309   /* vertices */
310   for (i=0;i<n_vertices;i++) {
311     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
312     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
313     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
314     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
315     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
316     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
317     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
318     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
319     if (n_constraints) {
320       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
321       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
322       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
323     }
324     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
325     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
326 
327     /* Set values in coarse basis function and subdomain part of coarse_mat */
328     /* coarse basis functions */
329     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
330     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
331     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
333     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
334     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
335     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
336     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
337       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
338       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
339       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
340       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
341       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
342     }
343     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
344     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
345     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
346     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
347     if (n_constraints) {
348       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
349       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
350       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
351     }
352 
353     /* check */
354     if (pcbddc->dbg_flag) {
355       /* assemble subdomain vector on local nodes */
356       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
357       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
358       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
359       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
360       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
361       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
362       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
363       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
364       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
365       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
366       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
367       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
368       if (n_constraints) {
369         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
370         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
371         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
372       }
373       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
374       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
375       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
376       /* check saddle point solution */
377       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
378       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
379       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
380       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
381       /* shift by the identity matrix */
382       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
383       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
384       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
385       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
386     }
387   }
388 
389   /* constraints */
390   for (i=0;i<n_constraints;i++) {
391     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
392     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
393     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
394     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
395     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
396     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
397     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
398     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
399     if (n_vertices) {
400       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
401     }
402     /* Set values in coarse basis function and subdomain part of coarse_mat */
403     /* coarse basis functions */
404     j = i+n_vertices; /* don't touch this! */
405     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
406     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
409     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
410     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
411     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
412       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
413       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
414       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
415       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
417     }
418     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
419     if (n_vertices) {
420       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
421       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
423     }
424     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
425     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
426     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
427 
428     if (pcbddc->dbg_flag) {
429       /* assemble subdomain vector on nodes */
430       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
431       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
432       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
433       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
434       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
435       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
436       /* assemble subdomain vector of lagrange multipliers */
437       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
438       if (n_vertices) {
439         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
440         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
441         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
442       }
443       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
444       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
445       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
449       /* check saddle point solution */
450       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
451       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
452       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
453       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
454       /* shift by the identity matrix */
455       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
456       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
457       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
458       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
459     }
460   }
461   /* call assembling routines for local coarse basis */
462   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
463   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
464   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
465     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
466     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
467   }
468 
469   /* compute other basis functions for non-symmetric problems */
470   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
471   if (!setsym || (setsym && !issym)) {
472     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
473     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
474     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
475     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
476     if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
477       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
478       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
479       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
480       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
481     }
482     for (i=0;i<pcbddc->local_primal_size;i++) {
483       if (n_constraints) {
484         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
485         for (j=0;j<n_constraints;j++) {
486           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
487         }
488         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
489         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
490       }
491       if (i<n_vertices) {
492         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
493         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
494         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
495         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
496         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
497         if (n_constraints) {
498           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
499         }
500       } else {
501         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
502       }
503       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
504       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
505       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
506       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
507       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
508       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
509       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
510       if (i<n_vertices) {
511         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
512       }
513       if (pcbddc->inexact_prec_type || pcbddc->dbg_flag) {
514         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
515         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
516         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
517         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
518         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
519       }
520 
521       if (pcbddc->dbg_flag) {
522         /* assemble subdomain vector on nodes */
523         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
524         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
525         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
526         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
527         if (i<n_vertices) {
528           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
529         }
530         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
531         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
532         /* assemble subdomain vector of lagrange multipliers */
533         for (j=0;j<pcbddc->local_primal_size;j++) {
534           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
535         }
536         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
537         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
538         /* check saddle point solution */
539         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
540         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
541         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
542         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
543         /* shift by the identity matrix */
544         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
545         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
546         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
547         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
548       }
549     }
550     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     if ( pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
553       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555     }
556   }
557   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
558   /* Checking coarse_sub_mat and coarse basis functios */
559   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
560   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
561   if (pcbddc->dbg_flag) {
562     Mat         coarse_sub_mat;
563     Mat         AUXMAT,TM1,TM2,TM3,TM4;
564     Mat         coarse_phi_D,coarse_phi_B;
565     Mat         coarse_psi_D,coarse_psi_B;
566     Mat         A_II,A_BB,A_IB,A_BI;
567     MatType     checkmattype=MATSEQAIJ;
568     PetscReal   real_value;
569 
570     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
571     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
572     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
573     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
574     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
575     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
576     if (pcbddc->coarse_psi_B) {
577       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
578       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
579     }
580     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
581 
582     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
583     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
584     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
585     if (pcbddc->coarse_psi_B) {
586       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
593       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
594       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
595       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598     } else {
599       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
600       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
601       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
602       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
603       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
604       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
605       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
606       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
607     }
608     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
609     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
610     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
611     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
612     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
613     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
614     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
615     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
616     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
617     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
618     for (i=0;i<pcbddc->local_primal_size;i++) {
619       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
620     }
621     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
622     for (i=0;i<pcbddc->local_primal_size;i++) {
623       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
624     }
625     if (pcbddc->coarse_psi_B) {
626       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
627       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
628         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
629       }
630       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
631       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
632         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
633       }
634     }
635     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
636     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
637     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
638     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
639     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
640     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
641     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
642     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
643     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
644     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
645     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
646     if (pcbddc->coarse_psi_B) {
647       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
648       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
649     }
650     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
651     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
652     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
653     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
654   }
655   /* free memory */
656   if (n_vertices) {
657     ierr = PetscFree(vertices);CHKERRQ(ierr);
658     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
659     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
660     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
661     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
662     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
663   }
664   if (n_constraints) {
665     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
666     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
667     ierr = MatDestroy(&M1);CHKERRQ(ierr);
668     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
669   }
670   ierr = PetscFree(auxindices);CHKERRQ(ierr);
671   /* get back data */
672   *coarse_submat_vals_n = coarse_submat_vals;
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
678 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
679 {
680   PC_IS*            pcis = (PC_IS*)(pc->data);
681   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
682   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
683   /* manage repeated solves */
684   MatReuse          reuse;
685   MatStructure      matstruct;
686   PetscErrorCode    ierr;
687 
688   PetscFunctionBegin;
689   /* get mat flags */
690   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
691   reuse = MAT_INITIAL_MATRIX;
692   if (pc->setupcalled) {
693     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
694     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
695     if (matstruct == SAME_NONZERO_PATTERN) {
696       reuse = MAT_REUSE_MATRIX;
697     } else {
698       reuse = MAT_INITIAL_MATRIX;
699     }
700   }
701   if (reuse == MAT_INITIAL_MATRIX) {
702     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
703     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
704     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
705     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
706     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
707   }
708 
709   /* transform local matrices if needed */
710   if (pcbddc->use_change_of_basis) {
711     Mat         change_mat_all;
712     PetscScalar *row_cmat_values;
713     PetscInt    *row_cmat_indices;
714     PetscInt    *nnz,*is_indices,*temp_indices;
715     PetscInt    i,j,k,n_D,n_B;
716 
717     /* Get Non-overlapping dimensions */
718     n_B = pcis->n_B;
719     n_D = pcis->n-n_B;
720 
721     /* compute nonzero structure of change of basis on all local nodes */
722     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
723     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
724     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
725     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
726     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
727     k=1;
728     for (i=0;i<n_B;i++) {
729       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
730       nnz[is_indices[i]]=j;
731       if (k < j) k = j;
732       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
733     }
734     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
735     /* assemble change of basis matrix on the whole set of local dofs */
736     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
737     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
738     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
739     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
740     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
741     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
742     for (i=0;i<n_D;i++) {
743       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
744     }
745     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
746     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
747     for (i=0;i<n_B;i++) {
748       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
749       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
750       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
751       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
752     }
753     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
754     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
755     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
756     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
757     if (i==1) {
758       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
759     } else {
760       Mat work_mat;
761       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
762       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
763       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
764     }
765     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
766     ierr = PetscFree(nnz);CHKERRQ(ierr);
767     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
768   } else {
769     /* without change of basis, the local matrix is unchanged */
770     if (!pcbddc->local_mat) {
771       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
772       pcbddc->local_mat = matis->A;
773     }
774   }
775 
776   /* get submatrices */
777   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
778   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
779   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
780   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
781   PetscFunctionReturn(0);
782 }
783 
784 #undef __FUNCT__
785 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
786 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
787 {
788   PC_IS*         pcis = (PC_IS*)(pc->data);
789   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
790   IS             is_aux1,is_aux2;
791   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
792   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
793   PetscBT        bitmask;
794   PetscErrorCode ierr;
795 
796   PetscFunctionBegin;
797   /* Set Non-overlapping dimensions */
798   n_B = pcis->n_B; n_D = pcis->n - n_B;
799   /* get vertex indices from constraint matrix */
800   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
801   /* Set number of constraints */
802   n_constraints = pcbddc->local_primal_size-n_vertices;
803   /* create auxiliary bitmask */
804   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
805   for (i=0;i<n_vertices;i++) {
806     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
807   }
808   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
809   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
810   for (i=0, n_R=0; i<pcis->n; i++) {
811     if (!PetscBTLookup(bitmask,i)) {
812       idx_R_local[n_R] = i;
813       n_R++;
814     }
815   }
816   ierr = PetscFree(vertices);CHKERRQ(ierr);
817   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
818 
819   /* print some info if requested */
820   if (pcbddc->dbg_flag) {
821     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
822     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
823     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
824     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
825     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
826     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
827     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
828   }
829 
830   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
831   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
832   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
833   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
834   for (i=0; i<n_D; i++) {
835     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
836   }
837   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
838   for (i=0, j=0; i<n_R; i++) {
839     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
840       aux_array1[j++] = i;
841     }
842   }
843   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
844   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
845   for (i=0, j=0; i<n_B; i++) {
846     if (!PetscBTLookup(bitmask,is_indices[i])) {
847       aux_array2[j++] = i;
848     }
849   }
850   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
851   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
852   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
853   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
854   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
855 
856   if (pcbddc->inexact_prec_type || pcbddc->dbg_flag ) {
857     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
858     for (i=0, j=0; i<n_R; i++) {
859       if (PetscBTLookup(bitmask,idx_R_local[i])) {
860         aux_array1[j++] = i;
861       }
862     }
863     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
864     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
865     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
866   }
867   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
873 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
874 {
875   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
876 
877   PetscFunctionBegin;
878   pcbddc->use_exact_dirichlet=use;
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
884 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
885 {
886   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
887   PC_IS          *pcis = (PC_IS*)pc->data;
888   PC             pc_temp;
889   Mat            A_RR;
890   Vec            vec1,vec2,vec3;
891   MatStructure   matstruct;
892   PetscScalar    m_one = -1.0;
893   PetscReal      value;
894   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   /* Creating PC contexts for local Dirichlet and Neumann problems */
899   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
900 
901   /* DIRICHLET PROBLEM */
902   /* Matrix for Dirichlet problem is pcis->A_II */
903   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
904   if (!pcbddc->ksp_D) { /* create object if not yet build */
905     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
906     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
907     /* default */
908     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
909     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
910     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
911     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
912     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
913   }
914   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
915   /* Allow user's customization */
916   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
917   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
918   if (!n_D) {
919     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
920     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
921   }
922   /* Set Up KSP for Dirichlet problem of BDDC */
923   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
924   /* set ksp_D into pcis data */
925   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
926   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
927   pcis->ksp_D = pcbddc->ksp_D;
928 
929   /* NEUMANN PROBLEM */
930   /* Matrix for Neumann problem is A_RR -> we need to create it */
931   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
932   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
933   if (!pcbddc->ksp_R) { /* create object if not yet build */
934     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
935     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
936     /* default */
937     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
938     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
939     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
940     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
941     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
942   }
943   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
944   /* Allow user's customization */
945   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
946   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
947   if (!n_R) {
948     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
949     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
950   }
951   /* Set Up KSP for Neumann problem of BDDC */
952   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
953 
954   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
955 
956   /* Dirichlet */
957   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
958   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
959   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
960   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
961   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
962   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
963   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
964   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
965   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
966   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
967   /* need to be adapted? */
968   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
969   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
970   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
971   /* print info */
972   if (pcbddc->dbg_flag) {
973     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
974     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
975     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
976     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
977     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
978   }
979   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->inexact_prec_type) {
980     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
981   }
982 
983   /* Neumann */
984   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
985   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
986   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
987   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
988   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
989   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
990   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
991   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
992   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
993   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
994   /* need to be adapted? */
995   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
996   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
997   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
998   /* print info */
999   if (pcbddc->dbg_flag) {
1000     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
1001     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1002   }
1003   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1004     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1005   }
1006 
1007   /* free Neumann problem's matrix */
1008   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1014 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1015 {
1016   PetscErrorCode ierr;
1017   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1018 
1019   PetscFunctionBegin;
1020   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1021   if (pcbddc->local_auxmat1) {
1022     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1023     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1024   }
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1030 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1031 {
1032   PetscErrorCode ierr;
1033   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1034   PC_IS*            pcis = (PC_IS*)  (pc->data);
1035   const PetscScalar zero = 0.0;
1036 
1037   PetscFunctionBegin;
1038   /* Application of PHI^T (or PSI^T)  */
1039   if (pcbddc->coarse_psi_B) {
1040     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1041     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1042   } else {
1043     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1044     if (pcbddc->inexact_prec_type) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1045   }
1046   /* Scatter data of coarse_rhs */
1047   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1048   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049 
1050   /* Local solution on R nodes */
1051   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1052   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1053   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   if (pcbddc->inexact_prec_type) {
1055     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1057   }
1058   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1059   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1060   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062   if (pcbddc->inexact_prec_type) {
1063     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1064     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1065   }
1066 
1067   /* Coarse solution */
1068   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1070     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1071   }
1072   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1074 
1075   /* Sum contributions from two levels */
1076   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1077   if (pcbddc->inexact_prec_type) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1083 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1084 {
1085   PetscErrorCode ierr;
1086   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1087 
1088   PetscFunctionBegin;
1089   switch (pcbddc->coarse_communications_type) {
1090     case SCATTERS_BDDC:
1091       ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1092       break;
1093     case GATHERS_BDDC:
1094       break;
1095   }
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1101 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1102 {
1103   PetscErrorCode ierr;
1104   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1105   PetscScalar*   array_to;
1106   PetscScalar*   array_from;
1107   MPI_Comm       comm;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr);
1112   switch (pcbddc->coarse_communications_type) {
1113     case SCATTERS_BDDC:
1114       ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1115       break;
1116     case GATHERS_BDDC:
1117       if (vec_from) {
1118         ierr = VecGetArray(vec_from,&array_from);CHKERRQ(ierr);
1119       }
1120       if (vec_to) {
1121         ierr = VecGetArray(vec_to,&array_to);CHKERRQ(ierr);
1122       }
1123       switch(pcbddc->coarse_problem_type){
1124         case SEQUENTIAL_BDDC:
1125           if (smode == SCATTER_FORWARD) {
1126             ierr = MPI_Gatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1127             if (vec_to) {
1128               if (imode == ADD_VALUES) {
1129                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1130                   array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1131                 }
1132               } else {
1133                 for (i=0;i<pcbddc->replicated_primal_size;i++) {
1134                   array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1135                 }
1136               }
1137             }
1138           } else {
1139             if (vec_from) {
1140               if (imode == ADD_VALUES) {
1141                 MPI_Comm vec_from_comm;
1142                 ierr = PetscObjectGetComm((PetscObject)(vec_from),&vec_from_comm);CHKERRQ(ierr);
1143                 SETERRQ2(vec_from_comm,PETSC_ERR_SUP,"Unsupported insert mode ADD_VALUES for SCATTER_REVERSE in %s for case %d\n",__FUNCT__,pcbddc->coarse_problem_type);
1144               }
1145               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1146                 pcbddc->replicated_local_primal_values[i]=array_from[pcbddc->replicated_local_primal_indices[i]];
1147               }
1148             }
1149             ierr = MPI_Scatterv(&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,&array_to[0],pcbddc->local_primal_size,MPIU_SCALAR,0,comm);CHKERRQ(ierr);
1150           }
1151           break;
1152         case REPLICATED_BDDC:
1153           if (smode == SCATTER_FORWARD) {
1154             ierr = MPI_Allgatherv(&array_from[0],pcbddc->local_primal_size,MPIU_SCALAR,&pcbddc->replicated_local_primal_values[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_SCALAR,comm);CHKERRQ(ierr);
1155             if (imode == ADD_VALUES) {
1156               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1157                 array_to[pcbddc->replicated_local_primal_indices[i]]+=pcbddc->replicated_local_primal_values[i];
1158               }
1159             } else {
1160               for (i=0;i<pcbddc->replicated_primal_size;i++) {
1161                 array_to[pcbddc->replicated_local_primal_indices[i]]=pcbddc->replicated_local_primal_values[i];
1162               }
1163             }
1164           } else { /* no communications needed for SCATTER_REVERSE since needed data is already present */
1165             if (imode == ADD_VALUES) {
1166               for (i=0;i<pcbddc->local_primal_size;i++) {
1167                 array_to[i]+=array_from[pcbddc->local_primal_indices[i]];
1168               }
1169             } else {
1170               for (i=0;i<pcbddc->local_primal_size;i++) {
1171                 array_to[i]=array_from[pcbddc->local_primal_indices[i]];
1172               }
1173             }
1174           }
1175           break;
1176         case MULTILEVEL_BDDC:
1177           break;
1178         case PARALLEL_BDDC:
1179           break;
1180       }
1181       if (vec_from) {
1182         ierr = VecRestoreArray(vec_from,&array_from);CHKERRQ(ierr);
1183       }
1184       if (vec_to) {
1185         ierr = VecRestoreArray(vec_to,&array_to);CHKERRQ(ierr);
1186       }
1187       break;
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 /* uncomment for testing purposes */
1193 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1194 #undef __FUNCT__
1195 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1196 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1197 {
1198   PetscErrorCode    ierr;
1199   PC_IS*            pcis = (PC_IS*)(pc->data);
1200   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1201   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1202   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1203   MatType           impMatType=MATSEQAIJ;
1204   /* one and zero */
1205   PetscScalar       one=1.0,zero=0.0;
1206   /* space to store constraints and their local indices */
1207   PetscScalar       *temp_quadrature_constraint;
1208   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1209   /* iterators */
1210   PetscInt          i,j,k,total_counts,temp_start_ptr;
1211   /* stuff to store connected components stored in pcbddc->mat_graph */
1212   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1213   PetscInt          n_ISForFaces,n_ISForEdges;
1214   PetscBool         get_faces,get_edges,get_vertices;
1215   /* near null space stuff */
1216   MatNullSpace      nearnullsp;
1217   const Vec         *nearnullvecs;
1218   Vec               *localnearnullsp;
1219   PetscBool         nnsp_has_cnst;
1220   PetscInt          nnsp_size;
1221   PetscScalar       *array;
1222   /* BLAS integers */
1223   PetscBLASInt      lwork,lierr;
1224   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1225   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1226   /* LAPACK working arrays for SVD or POD */
1227   PetscBool         skip_lapack;
1228   PetscScalar       *work;
1229   PetscReal         *singular_vals;
1230 #if defined(PETSC_USE_COMPLEX)
1231   PetscReal         *rwork;
1232 #endif
1233 #if defined(PETSC_MISSING_LAPACK_GESVD)
1234   PetscBLASInt      Blas_one_2=1;
1235   PetscScalar       *temp_basis,*correlation_mat;
1236 #else
1237   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1238   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1239 #endif
1240   /* change of basis */
1241   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1242   PetscBool         boolforchange;
1243   PetscBT           touched,change_basis;
1244   /* auxiliary stuff */
1245   PetscInt          *nnz,*is_indices,*local_to_B;
1246   /* some quantities */
1247   PetscInt          n_vertices,total_primal_vertices;
1248   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1249 
1250 
1251   PetscFunctionBegin;
1252   /* Get index sets for faces, edges and vertices from graph */
1253   get_faces = PETSC_TRUE;
1254   get_edges = PETSC_TRUE;
1255   get_vertices = PETSC_TRUE;
1256   if (pcbddc->vertices_flag) {
1257     get_faces = PETSC_FALSE;
1258     get_edges = PETSC_FALSE;
1259   }
1260   if (pcbddc->constraints_flag) {
1261     get_vertices = PETSC_FALSE;
1262   }
1263   if (pcbddc->faces_flag) {
1264     get_edges = PETSC_FALSE;
1265   }
1266   if (pcbddc->edges_flag) {
1267     get_faces = PETSC_FALSE;
1268   }
1269   /* default */
1270   if (!get_faces && !get_edges && !get_vertices) {
1271     get_vertices = PETSC_TRUE;
1272   }
1273   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,get_faces,get_edges,get_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1274   /* print some info */
1275   if (pcbddc->dbg_flag) {
1276     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1277     i = 0;
1278     if (ISForVertices) {
1279       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1280     }
1281     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1282     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1283     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1284     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1285   }
1286   /* check if near null space is attached to global mat */
1287   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1288   if (nearnullsp) {
1289     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1290   } else { /* if near null space is not provided BDDC uses constants by default */
1291     nnsp_size = 0;
1292     nnsp_has_cnst = PETSC_TRUE;
1293   }
1294   /* get max number of constraints on a single cc */
1295   max_constraints = nnsp_size;
1296   if (nnsp_has_cnst) max_constraints++;
1297 
1298   /*
1299        Evaluate maximum storage size needed by the procedure
1300        - temp_indices will contain start index of each constraint stored as follows
1301        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1302        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1303        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1304                                                                                                                                                          */
1305   total_counts = n_ISForFaces+n_ISForEdges;
1306   total_counts *= max_constraints;
1307   n_vertices = 0;
1308   if (ISForVertices) {
1309     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1310   }
1311   total_counts += n_vertices;
1312   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1313   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1314   total_counts = 0;
1315   max_size_of_constraint = 0;
1316   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1317     if (i<n_ISForEdges) {
1318       used_IS = &ISForEdges[i];
1319     } else {
1320       used_IS = &ISForFaces[i-n_ISForEdges];
1321     }
1322     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1323     total_counts += j;
1324     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1325   }
1326   total_counts *= max_constraints;
1327   total_counts += n_vertices;
1328   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1329   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1330   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1331   /* local to boundary numbering */
1332   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1333   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1334   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1335   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1336   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1337   /* get local part of global near null space vectors */
1338   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1339   for (k=0;k<nnsp_size;k++) {
1340     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1341     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1342     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1343   }
1344 
1345   /* whether or not to skip lapack calls */
1346   skip_lapack = PETSC_TRUE;
1347   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1348 
1349   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1350   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1351     PetscScalar temp_work;
1352 #if defined(PETSC_MISSING_LAPACK_GESVD)
1353     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1354     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1355     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1356     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1357 #if defined(PETSC_USE_COMPLEX)
1358     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1359 #endif
1360     /* now we evaluate the optimal workspace using query with lwork=-1 */
1361     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1362     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1363     lwork = -1;
1364     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1365 #if !defined(PETSC_USE_COMPLEX)
1366     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1367 #else
1368     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1369 #endif
1370     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1371     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1372 #else /* on missing GESVD */
1373     /* SVD */
1374     PetscInt max_n,min_n;
1375     max_n = max_size_of_constraint;
1376     min_n = max_constraints;
1377     if (max_size_of_constraint < max_constraints) {
1378       min_n = max_size_of_constraint;
1379       max_n = max_constraints;
1380     }
1381     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1382 #if defined(PETSC_USE_COMPLEX)
1383     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1384 #endif
1385     /* now we evaluate the optimal workspace using query with lwork=-1 */
1386     lwork = -1;
1387     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1388     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1389     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1390     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1391 #if !defined(PETSC_USE_COMPLEX)
1392     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1393 #else
1394     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1395 #endif
1396     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1397     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1398 #endif /* on missing GESVD */
1399     /* Allocate optimal workspace */
1400     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1401     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1402   }
1403   /* Now we can loop on constraining sets */
1404   total_counts = 0;
1405   temp_indices[0] = 0;
1406   /* vertices */
1407   if (ISForVertices) {
1408     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1409     if (nnsp_has_cnst) { /* consider all vertices */
1410       for (i=0;i<n_vertices;i++) {
1411         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1412         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1413         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1414         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1415         total_counts++;
1416       }
1417     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1418       PetscBool used_vertex;
1419       for (i=0;i<n_vertices;i++) {
1420         used_vertex = PETSC_FALSE;
1421         k = 0;
1422         while (!used_vertex && k<nnsp_size) {
1423           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1424           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1425             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1426             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1427             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1428             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1429             total_counts++;
1430             used_vertex = PETSC_TRUE;
1431           }
1432           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1433           k++;
1434         }
1435       }
1436     }
1437     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1438     n_vertices = total_counts;
1439   }
1440 
1441   /* edges and faces */
1442   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1443     if (i<n_ISForEdges) {
1444       used_IS = &ISForEdges[i];
1445       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1446     } else {
1447       used_IS = &ISForFaces[i-n_ISForEdges];
1448       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1449     }
1450     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1451     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1452     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1453     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1454     /* change of basis should not be performed on local periodic nodes */
1455     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1456     if (nnsp_has_cnst) {
1457       PetscScalar quad_value;
1458       temp_constraints++;
1459       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1460       for (j=0;j<size_of_constraint;j++) {
1461         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1462         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1463         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1464       }
1465       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1466       if (boolforchange) {
1467         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1468       }
1469       total_counts++;
1470     }
1471     for (k=0;k<nnsp_size;k++) {
1472       PetscReal real_value;
1473       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1474       for (j=0;j<size_of_constraint;j++) {
1475         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1476         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1477         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1478       }
1479       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1480       /* check if array is null on the connected component */
1481       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1482       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1483       if (real_value > 0.0) { /* keep indices and values */
1484         temp_constraints++;
1485         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1486         if (boolforchange) {
1487           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1488         }
1489         total_counts++;
1490       }
1491     }
1492     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1493     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1494     if (!pcbddc->use_nnsp_true) {
1495       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1496 
1497 #if defined(PETSC_MISSING_LAPACK_GESVD)
1498       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1499          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1500          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1501             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1502             from that computed using LAPACKgesvd
1503          -> This is due to a different computation of eigenvectors in LAPACKheev
1504          -> The quality of the POD-computed basis will be the same */
1505       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1506       /* Store upper triangular part of correlation matrix */
1507       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1508       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1509       for (j=0;j<temp_constraints;j++) {
1510         for (k=0;k<j+1;k++) {
1511           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1512         }
1513       }
1514       /* compute eigenvalues and eigenvectors of correlation matrix */
1515       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1516       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1517 #if !defined(PETSC_USE_COMPLEX)
1518       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1519 #else
1520       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1521 #endif
1522       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1523       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1524       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1525       j=0;
1526       while (j < temp_constraints && singular_vals[j] < tol) j++;
1527       total_counts=total_counts-j;
1528       /* scale and copy POD basis into used quadrature memory */
1529       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1530       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1531       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1532       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1533       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1534       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1535       if (j<temp_constraints) {
1536         PetscInt ii;
1537         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1538         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1539         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1540         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1541         for (k=0;k<temp_constraints-j;k++) {
1542           for (ii=0;ii<size_of_constraint;ii++) {
1543             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1544           }
1545         }
1546       }
1547 #else  /* on missing GESVD */
1548       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1549       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1550       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1551       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1552 #if !defined(PETSC_USE_COMPLEX)
1553       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1554 #else
1555       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1556 #endif
1557       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1558       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1559       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1560       k = temp_constraints;
1561       if (k > size_of_constraint) k = size_of_constraint;
1562       j = 0;
1563       while (j < k && singular_vals[k-j-1] < tol) j++;
1564       total_counts = total_counts-temp_constraints+k-j;
1565 #endif /* on missing GESVD */
1566     }
1567   }
1568   /* free index sets of faces, edges and vertices */
1569   for (i=0;i<n_ISForFaces;i++) {
1570     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1571   }
1572   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1573   for (i=0;i<n_ISForEdges;i++) {
1574     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1575   }
1576   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1577   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1578 
1579   /* free workspace */
1580   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1581     ierr = PetscFree(work);CHKERRQ(ierr);
1582 #if defined(PETSC_USE_COMPLEX)
1583     ierr = PetscFree(rwork);CHKERRQ(ierr);
1584 #endif
1585     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1586 #if defined(PETSC_MISSING_LAPACK_GESVD)
1587     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1588     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1589 #endif
1590   }
1591   for (k=0;k<nnsp_size;k++) {
1592     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1593   }
1594   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1595 
1596   /* set quantities in pcbddc data structure */
1597   /* n_vertices defines the number of subdomain corners in the primal space */
1598   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1599   pcbddc->local_primal_size = total_counts;
1600   pcbddc->n_vertices = n_vertices;
1601   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1602 
1603   /* Create constraint matrix */
1604   /* The constraint matrix is used to compute the l2g map of primal dofs */
1605   /* so we need to set it up properly either with or without change of basis */
1606   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1607   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1608   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1609   /* array to compute a local numbering of constraints : vertices first then constraints */
1610   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1611   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1612   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1613   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1614   /* auxiliary stuff for basis change */
1615   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1616   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1617 
1618   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1619   total_primal_vertices=0;
1620   for (i=0;i<pcbddc->local_primal_size;i++) {
1621     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1622     if (size_of_constraint == 1) {
1623       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1624       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1625       aux_primal_minloc[total_primal_vertices]=0;
1626       total_primal_vertices++;
1627     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1628       PetscInt min_loc,min_index;
1629       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1630       /* find first untouched local node */
1631       k = 0;
1632       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1633       min_index = global_indices[k];
1634       min_loc = k;
1635       /* search the minimum among global nodes already untouched on the cc */
1636       for (k=1;k<size_of_constraint;k++) {
1637         /* there can be more than one constraint on a single connected component */
1638         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1639           min_index = global_indices[k];
1640           min_loc = k;
1641         }
1642       }
1643       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1644       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1645       aux_primal_minloc[total_primal_vertices]=min_loc;
1646       total_primal_vertices++;
1647     }
1648   }
1649   /* free workspace */
1650   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1651   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1652   /* permute indices in order to have a sorted set of vertices */
1653   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1654 
1655   /* nonzero structure of constraint matrix */
1656   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1657   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1658   j=total_primal_vertices;
1659   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1660     if (!PetscBTLookup(change_basis,i)) {
1661       nnz[j]=temp_indices[i+1]-temp_indices[i];
1662       j++;
1663     }
1664   }
1665   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1666   ierr = PetscFree(nnz);CHKERRQ(ierr);
1667   /* set values in constraint matrix */
1668   for (i=0;i<total_primal_vertices;i++) {
1669     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1670   }
1671   total_counts = total_primal_vertices;
1672   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1673     if (!PetscBTLookup(change_basis,i)) {
1674       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1675       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1676       total_counts++;
1677     }
1678   }
1679   /* assembling */
1680   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682   /*
1683   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1684   */
1685   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1686   if (pcbddc->use_change_of_basis) {
1687     PetscBool qr_needed = PETSC_FALSE;
1688     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1689     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1690     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1691     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1692     /* work arrays */
1693     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1694     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1695     /* nonzeros per row */
1696     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1697       if (PetscBTLookup(change_basis,i)) {
1698         qr_needed = PETSC_TRUE;
1699         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1700         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1701       }
1702     }
1703     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1704     ierr = PetscFree(nnz);CHKERRQ(ierr);
1705     /* Set initial identity in the matrix */
1706     for (i=0;i<pcis->n_B;i++) {
1707       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1708     }
1709 
1710     /* Now we loop on the constraints which need a change of basis */
1711     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1712     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1713     /* Change of basis matrix T computed via QR decomposition of constraints */
1714     if (qr_needed) {
1715       /* dual and primal dofs on a single cc */
1716       PetscInt     dual_dofs,primal_dofs;
1717       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1718       PetscInt     primal_counter;
1719       /* working stuff for GEQRF */
1720       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1721       PetscBLASInt lqr_work;
1722       /* working stuff for UNGQR */
1723       PetscScalar  *gqr_work,lgqr_work_t;
1724       PetscBLASInt lgqr_work;
1725       /* working stuff for TRTRS */
1726       PetscScalar  *trs_rhs;
1727       PetscBLASInt Blas_NRHS;
1728       /* pointers for values insertion into change of basis matrix */
1729       PetscInt     *start_rows,*start_cols;
1730       PetscScalar  *start_vals;
1731       /* working stuff for values insertion */
1732       PetscBT      is_primal;
1733 
1734       /* space to store Q */
1735       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1736       /* first we issue queries for optimal work */
1737       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1738       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1739       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1740       lqr_work = -1;
1741       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1742       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1743       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1744       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1745       lgqr_work = -1;
1746       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1747       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1748       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1749       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1750       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1751       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1752       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1753       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1754       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1755       /* array to store scaling factors for reflectors */
1756       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1757       /* array to store rhs and solution of triangular solver */
1758       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1759       /* array to store whether a node is primal or not */
1760       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1761       for (i=0;i<total_primal_vertices;i++) {
1762         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1763       }
1764 
1765       /* allocating workspace for check */
1766       if (pcbddc->dbg_flag) {
1767         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1768         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1769         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1770       }
1771 
1772       /* loop on constraints and see whether or not they need a change of basis */
1773       /* -> using implicit ordering contained in temp_indices data */
1774       total_counts = pcbddc->n_vertices;
1775       primal_counter = total_counts;
1776       while (total_counts<pcbddc->local_primal_size) {
1777         primal_dofs = 1;
1778         if (PetscBTLookup(change_basis,total_counts)) {
1779           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1780           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1781             primal_dofs++;
1782           }
1783           /* get constraint info */
1784           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1785           dual_dofs = size_of_constraint-primal_dofs;
1786 
1787           /* copy quadrature constraints for change of basis check */
1788           if (pcbddc->dbg_flag) {
1789             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1790             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1791           }
1792 
1793           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1794           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1795 
1796           /* compute QR decomposition of constraints */
1797           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1798           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1799           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1800           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1801           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1802           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1803           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1804 
1805           /* explictly compute R^-T */
1806           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1807           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1808           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1809           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1810           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1811           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1812           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1813           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1814           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1815           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1816 
1817           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1818           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1819           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1820           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1821           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1822           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1823           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1824           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1825           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1826 
1827           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1828              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1829              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1830           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1831           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1832           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1833           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1834           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1835           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1836           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1837           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1838           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1839           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1840 
1841           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1842           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1843           /* insert cols for primal dofs */
1844           for (j=0;j<primal_dofs;j++) {
1845             start_vals = &qr_basis[j*size_of_constraint];
1846             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1847             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1848           }
1849           /* insert cols for dual dofs */
1850           for (j=0,k=0;j<dual_dofs;k++) {
1851             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1852               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1853               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1854               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1855               j++;
1856             }
1857           }
1858 
1859           /* check change of basis */
1860           if (pcbddc->dbg_flag) {
1861             PetscInt   ii,jj;
1862             PetscBool valid_qr=PETSC_TRUE;
1863             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1864             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1865             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1866             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1867             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1868             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1869             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1870             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1871             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1872             for (jj=0;jj<size_of_constraint;jj++) {
1873               for (ii=0;ii<primal_dofs;ii++) {
1874                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1875                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1876               }
1877             }
1878             if (!valid_qr) {
1879               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1880               for (jj=0;jj<size_of_constraint;jj++) {
1881                 for (ii=0;ii<primal_dofs;ii++) {
1882                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1883                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1884                   }
1885                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1886                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1887                   }
1888                 }
1889               }
1890             } else {
1891               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1892             }
1893           }
1894           /* increment primal counter */
1895           primal_counter += primal_dofs;
1896         } else {
1897           if (pcbddc->dbg_flag) {
1898             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1899           }
1900         }
1901         /* increment constraint counter total_counts */
1902         total_counts += primal_dofs;
1903       }
1904       if (pcbddc->dbg_flag) {
1905         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1906         ierr = PetscFree(work);CHKERRQ(ierr);
1907       }
1908       /* free workspace */
1909       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1910       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1911       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1912       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1913       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1914       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1915     }
1916     /* assembling */
1917     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1918     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1919     /*
1920     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1921     */
1922   }
1923   /* free workspace */
1924   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1925   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1926   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1927   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1928   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1929   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1930   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1931   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1937 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1938 {
1939   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1940   PC_IS       *pcis = (PC_IS*)pc->data;
1941   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1942   PetscInt    bs,ierr,i,vertex_size;
1943   PetscViewer viewer=pcbddc->dbg_viewer;
1944 
1945   PetscFunctionBegin;
1946   /* Init local Graph struct */
1947   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1948 
1949   /* Check validity of the csr graph passed in by the user */
1950   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1951     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1952   }
1953   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1954   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1955     Mat mat_adj;
1956     const PetscInt *xadj,*adjncy;
1957     PetscBool flg_row=PETSC_TRUE;
1958 
1959     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1960     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1961     if (!flg_row) {
1962       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1963     }
1964     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1965     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1966     if (!flg_row) {
1967       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1968     }
1969     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1970   }
1971 
1972   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1973   vertex_size = 1;
1974   if (!pcbddc->n_ISForDofs) {
1975     IS *custom_ISForDofs;
1976 
1977     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1978     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1979     for (i=0;i<bs;i++) {
1980       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1981     }
1982     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1983     /* remove my references to IS objects */
1984     for (i=0;i<bs;i++) {
1985       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1986     }
1987     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1988   } else { /* mat block size as vertex size (used for elasticity) */
1989     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1990   }
1991 
1992   /* Setup of Graph */
1993   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1994 
1995   /* Graph's connected components analysis */
1996   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1997 
1998   /* print some info to stdout */
1999   if (pcbddc->dbg_flag) {
2000     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2001   }
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 #undef __FUNCT__
2006 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2007 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt *vertices_idx[])
2008 {
2009   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2010   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2011   PetscErrorCode ierr;
2012 
2013   PetscFunctionBegin;
2014   n = 0;
2015   vertices = 0;
2016   if (pcbddc->ConstraintMatrix) {
2017     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2018     for (i=0;i<local_primal_size;i++) {
2019       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2020       if (size_of_constraint == 1) n++;
2021       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2022     }
2023     if (vertices_idx) {
2024       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2025       n = 0;
2026       for (i=0;i<local_primal_size;i++) {
2027         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2028         if (size_of_constraint == 1) {
2029           vertices[n++]=row_cmat_indices[0];
2030         }
2031         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2032       }
2033     }
2034   }
2035   *n_vertices = n;
2036   if (vertices_idx) *vertices_idx = vertices;
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2042 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt *constraints_idx[])
2043 {
2044   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2045   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2046   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2047   PetscBT        touched;
2048   PetscErrorCode ierr;
2049 
2050   PetscFunctionBegin;
2051   n = 0;
2052   constraints_index = 0;
2053   if (pcbddc->ConstraintMatrix) {
2054     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2055     max_size_of_constraint = 0;
2056     for (i=0;i<local_primal_size;i++) {
2057       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2058       if (size_of_constraint > 1) {
2059         n++;
2060       }
2061       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2062       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2063     }
2064     if (constraints_idx) {
2065       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2066       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2067       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2068       n = 0;
2069       for (i=0;i<local_primal_size;i++) {
2070         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2071         if (size_of_constraint > 1) {
2072           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2073           /* find first untouched local node */
2074           j = 0;
2075           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2076           min_index = row_cmat_global_indices[j];
2077           min_loc = j;
2078           /* search the minimum among nodes not yet touched on the connected component
2079              since there can be more than one constraint on a single cc */
2080           for (j=1;j<size_of_constraint;j++) {
2081             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2082               min_index = row_cmat_global_indices[j];
2083               min_loc = j;
2084             }
2085           }
2086           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2087           constraints_index[n++] = row_cmat_indices[min_loc];
2088         }
2089         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2090       }
2091       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2092       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2093     }
2094   }
2095   *n_constraints = n;
2096   if (constraints_idx) *constraints_idx = constraints_index;
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 /* the next two functions has been adapted from pcis.c */
2101 #undef __FUNCT__
2102 #define __FUNCT__ "PCBDDCApplySchur"
2103 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2104 {
2105   PetscErrorCode ierr;
2106   PC_IS          *pcis = (PC_IS*)(pc->data);
2107 
2108   PetscFunctionBegin;
2109   if (!vec2_B) { vec2_B = v; }
2110   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2111   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2112   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2113   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2114   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2120 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2121 {
2122   PetscErrorCode ierr;
2123   PC_IS          *pcis = (PC_IS*)(pc->data);
2124 
2125   PetscFunctionBegin;
2126   if (!vec2_B) { vec2_B = v; }
2127   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2128   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2129   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2130   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2131   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 #undef __FUNCT__
2136 #define __FUNCT__ "PCBDDCSubsetNumbering"
2137 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2138 {
2139   Vec            local_vec,global_vec;
2140   IS             seqis,paris;
2141   VecScatter     scatter_ctx;
2142   PetscScalar    *array;
2143   PetscInt       *temp_global_dofs;
2144   PetscScalar    globalsum;
2145   PetscInt       i,j,s;
2146   PetscInt       nlocals,first_index,old_index,max_local;
2147   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2148   PetscMPIInt    *dof_sizes,*dof_displs;
2149   PetscBool      first_found;
2150   PetscErrorCode ierr;
2151 
2152   PetscFunctionBegin;
2153   /* mpi buffers */
2154   MPI_Comm_size(comm,&size_prec_comm);
2155   MPI_Comm_rank(comm,&rank_prec_comm);
2156   j = ( !rank_prec_comm ? size_prec_comm : 0);
2157   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2158   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2159   /* get maximum size of subset */
2160   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2161   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2162   max_local = 0;
2163   if (n_local_dofs) {
2164     max_local = temp_global_dofs[0];
2165     for (i=1;i<n_local_dofs;i++) {
2166       if (max_local < temp_global_dofs[i] ) {
2167         max_local = temp_global_dofs[i];
2168       }
2169     }
2170   }
2171   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2172   max_global++;
2173   max_local = 0;
2174   if (n_local_dofs) {
2175     max_local = local_dofs[0];
2176     for (i=1;i<n_local_dofs;i++) {
2177       if (max_local < local_dofs[i] ) {
2178         max_local = local_dofs[i];
2179       }
2180     }
2181   }
2182   max_local++;
2183   /* allocate workspace */
2184   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2185   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2186   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2187   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2188   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2189   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2190   /* create scatter */
2191   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2192   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2193   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2194   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2195   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2196   /* init array */
2197   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2198   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2199   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2200   if (local_dofs_mult) {
2201     for (i=0;i<n_local_dofs;i++) {
2202       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2203     }
2204   } else {
2205     for (i=0;i<n_local_dofs;i++) {
2206       array[local_dofs[i]]=1.0;
2207     }
2208   }
2209   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2210   /* scatter into global vec and get total number of global dofs */
2211   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2212   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2213   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2214   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2215   /* Fill global_vec with cumulative function for global numbering */
2216   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2217   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2218   nlocals = 0;
2219   first_index = -1;
2220   first_found = PETSC_FALSE;
2221   for (i=0;i<s;i++) {
2222     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2223       first_found = PETSC_TRUE;
2224       first_index = i;
2225     }
2226     nlocals += (PetscInt)PetscRealPart(array[i]);
2227   }
2228   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2229   if (!rank_prec_comm) {
2230     dof_displs[0]=0;
2231     for (i=1;i<size_prec_comm;i++) {
2232       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2233     }
2234   }
2235   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2236   if (first_found) {
2237     array[first_index] += (PetscScalar)nlocals;
2238     old_index = first_index;
2239     for (i=first_index+1;i<s;i++) {
2240       if (PetscRealPart(array[i]) > 0.0) {
2241         array[i] += array[old_index];
2242         old_index = i;
2243       }
2244     }
2245   }
2246   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2247   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2248   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2249   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2250   /* get global ordering of local dofs */
2251   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2252   if (local_dofs_mult) {
2253     for (i=0;i<n_local_dofs;i++) {
2254       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2255     }
2256   } else {
2257     for (i=0;i<n_local_dofs;i++) {
2258       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2259     }
2260   }
2261   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2262   /* free workspace */
2263   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2264   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2265   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2266   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2267   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2268   /* return pointer to global ordering of local dofs */
2269   *global_numbering_subset = temp_global_dofs;
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2275 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2276 {
2277   PetscInt       i,j;
2278   PetscScalar    *alphas;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   /* this implements stabilized Gram-Schmidt */
2283   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2284   for (i=0;i<n;i++) {
2285     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2286     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2287     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2288   }
2289   ierr = PetscFree(alphas);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 /* BDDC requires metis 5.0.1 for multilevel */
2294 #if defined(PETSC_HAVE_METIS)
2295 #include "metis.h"
2296 #define MetisInt    idx_t
2297 #define MetisScalar real_t
2298 #endif
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2302 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2303 {
2304   Mat_IS    *matis    = (Mat_IS*)pc->pmat->data;
2305   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
2306   PC_IS     *pcis     = (PC_IS*)pc->data;
2307   MPI_Comm  prec_comm;
2308   MPI_Comm  coarse_comm;
2309 
2310   PetscInt  coarse_size;
2311 
2312   MatNullSpace CoarseNullSpace;
2313 
2314   /* common to all choiches */
2315   PetscScalar *temp_coarse_mat_vals;
2316   PetscScalar *ins_coarse_mat_vals;
2317   PetscInt    *ins_local_primal_indices;
2318   PetscMPIInt *localsizes2,*localdispl2;
2319   PetscMPIInt size_prec_comm;
2320   PetscMPIInt rank_prec_comm;
2321   PetscMPIInt active_rank=MPI_PROC_NULL;
2322   PetscMPIInt master_proc=0;
2323   PetscInt    ins_local_primal_size;
2324   /* specific to MULTILEVEL_BDDC */
2325   PetscMPIInt *ranks_recv=0;
2326   PetscMPIInt count_recv=0;
2327   PetscMPIInt rank_coarse_proc_send_to=-1;
2328   PetscMPIInt coarse_color = MPI_UNDEFINED;
2329   ISLocalToGlobalMapping coarse_ISLG;
2330   /* some other variables */
2331   PetscErrorCode ierr;
2332   MatType coarse_mat_type;
2333   PCType  coarse_pc_type;
2334   KSPType coarse_ksp_type;
2335   PC pc_temp;
2336   PetscInt i,j,k;
2337   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
2338   /* verbose output viewer */
2339   PetscViewer viewer=pcbddc->dbg_viewer;
2340   PetscInt    dbg_flag=pcbddc->dbg_flag;
2341 
2342   PetscInt      offset,offset2;
2343   PetscMPIInt   im_active,active_procs;
2344   PetscInt      *dnz,*onz;
2345 
2346   PetscBool     setsym,issym=PETSC_FALSE;
2347 
2348   PetscFunctionBegin;
2349   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2350   ins_local_primal_indices = 0;
2351   ins_coarse_mat_vals      = 0;
2352   localsizes2              = 0;
2353   localdispl2              = 0;
2354   temp_coarse_mat_vals     = 0;
2355   coarse_ISLG              = 0;
2356 
2357   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2358   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2359   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2360 
2361   /* Assign global numbering to coarse dofs */
2362   {
2363     PetscInt     *auxlocal_primal,*aux_idx;
2364     PetscMPIInt  mpi_local_primal_size;
2365     PetscScalar  coarsesum,*array;
2366 
2367     mpi_local_primal_size = (PetscMPIInt)pcbddc->local_primal_size;
2368 
2369     /* Construct needed data structures for message passing */
2370     j = 0;
2371     if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2372       j = size_prec_comm;
2373     }
2374     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_sizes);CHKERRQ(ierr);
2375     ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2376     /* Gather local_primal_size information for all processes  */
2377     if (pcbddc->coarse_problem_type == REPLICATED_BDDC || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2378       ierr = MPI_Allgather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2379     } else {
2380       ierr = MPI_Gather(&mpi_local_primal_size,1,MPIU_INT,&pcbddc->local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr);
2381     }
2382     pcbddc->replicated_primal_size = 0;
2383     for (i=0; i<j; i++) {
2384       pcbddc->local_primal_displacements[i] = pcbddc->replicated_primal_size ;
2385       pcbddc->replicated_primal_size += pcbddc->local_primal_sizes[i];
2386     }
2387 
2388     /* This code fragment assumes that the number of local constraints per connected component
2389        is not greater than the number of nodes defined for the connected component
2390        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2391     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2392     ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2393     ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2394     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2395     ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2396     ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2397     ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2398     /* Compute number of coarse dofs */
2399     ierr = PCBDDCSubsetNumbering(prec_comm,matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&pcbddc->local_primal_indices);CHKERRQ(ierr);
2400 
2401     if (dbg_flag) {
2402       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2403       ierr = PetscViewerASCIIPrintf(viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2404       ierr = PetscViewerASCIIPrintf(viewer,"Check coarse indices\n");CHKERRQ(ierr);
2405       ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2406       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2407       for (i=0;i<pcbddc->local_primal_size;i++) array[auxlocal_primal[i]]=1.0;
2408       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2409       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2410       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2411       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2412       ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2413       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2414       ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2415       for (i=0;i<pcis->n;i++) {
2416         if (array[i] == 1.0) {
2417           ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&j);CHKERRQ(ierr);
2418           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d: WRONG COARSE INDEX %d (local %d)\n",PetscGlobalRank,j,i);CHKERRQ(ierr);
2419         }
2420       }
2421       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2422       for (i=0;i<pcis->n;i++) {
2423         if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
2424       }
2425       ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2426       ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2427       ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2428       ierr = VecScatterEnd  (matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2429       ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
2430       ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem SHOULD be %lf\n",coarsesum);CHKERRQ(ierr);
2431       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2432     }
2433     ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
2434   }
2435 
2436   if (dbg_flag) {
2437     ierr = PetscViewerASCIIPrintf(viewer,"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr);
2438     if (dbg_flag > 1) {
2439       ierr = PetscViewerASCIIPrintf(viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
2440       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2441       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2442       for (i=0;i<pcbddc->local_primal_size;i++) {
2443         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"local_primal_indices[%d]=%d \n",i,pcbddc->local_primal_indices[i]);
2444       }
2445       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2446     }
2447   }
2448 
2449   im_active = 0;
2450   if (pcis->n) im_active = 1;
2451   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr);
2452 
2453   /* adapt coarse problem type */
2454 #if defined(PETSC_HAVE_METIS)
2455   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2456     if (pcbddc->current_level < pcbddc->max_levels) {
2457       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2458         if (dbg_flag) {
2459           ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2460          ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2461         }
2462         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2463       }
2464     } else {
2465       if (dbg_flag) {
2466         ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2467         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2468       }
2469       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2470     }
2471   }
2472 #else
2473   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2474 #endif
2475 
2476   switch(pcbddc->coarse_problem_type){
2477 
2478     case(MULTILEVEL_BDDC):   /* we define a coarse mesh where subdomains are elements */
2479     {
2480 #if defined(PETSC_HAVE_METIS)
2481       /* we need additional variables */
2482       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2483       MetisInt    *metis_coarse_subdivision;
2484       MetisInt    options[METIS_NOPTIONS];
2485       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2486       PetscMPIInt procs_jumps_coarse_comm;
2487       PetscMPIInt *coarse_subdivision;
2488       PetscMPIInt *total_count_recv;
2489       PetscMPIInt *total_ranks_recv;
2490       PetscMPIInt *displacements_recv;
2491       PetscMPIInt *my_faces_connectivity;
2492       PetscMPIInt *petsc_faces_adjncy;
2493       MetisInt    *faces_adjncy;
2494       MetisInt    *faces_xadj;
2495       PetscMPIInt *number_of_faces;
2496       PetscMPIInt *faces_displacements;
2497       PetscInt    *array_int;
2498       PetscMPIInt my_faces=0;
2499       PetscMPIInt total_faces=0;
2500       PetscInt    ranks_stretching_ratio;
2501 
2502       /* define some quantities */
2503       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2504       coarse_mat_type = MATIS;
2505       coarse_pc_type  = PCBDDC;
2506       coarse_ksp_type = KSPRICHARDSON;
2507 
2508       /* details of coarse decomposition */
2509       n_subdomains = active_procs;
2510       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2511       ranks_stretching_ratio = size_prec_comm/active_procs;
2512       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2513 
2514 #if 0
2515       PetscMPIInt *old_ranks;
2516       PetscInt    *new_ranks,*jj,*ii;
2517       MatPartitioning mat_part;
2518       IS coarse_new_decomposition,is_numbering;
2519       PetscViewer viewer_test;
2520       MPI_Comm    test_coarse_comm;
2521       PetscMPIInt test_coarse_color;
2522       Mat         mat_adj;
2523       /* Create new communicator for coarse problem splitting the old one */
2524       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2525          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2526       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2527       test_coarse_comm = MPI_COMM_NULL;
2528       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2529       if (im_active) {
2530         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2531         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2532         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2533         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2534         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2535         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2536         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2537         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2538         k = pcis->n_neigh-1;
2539         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2540         ii[0]=0;
2541         ii[1]=k;
2542         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2543         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2544         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2545         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2546         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2547         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2548         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2549         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2550         printf("Setting Nparts %d\n",n_parts);
2551         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2552         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2553         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2554         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2555         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2556         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2557         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2558         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2559         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2560         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2561         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2562         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2563         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2564       }
2565 #endif
2566 
2567       /* build CSR graph of subdomains' connectivity */
2568       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2569       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2570       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2571         for (j=0;j<pcis->n_shared[i];j++){
2572           array_int[ pcis->shared[i][j] ]+=1;
2573         }
2574       }
2575       for (i=1;i<pcis->n_neigh;i++){
2576         for (j=0;j<pcis->n_shared[i];j++){
2577           if (array_int[ pcis->shared[i][j] ] > 0 ){
2578             my_faces++;
2579             break;
2580           }
2581         }
2582       }
2583 
2584       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2585       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2586       my_faces=0;
2587       for (i=1;i<pcis->n_neigh;i++){
2588         for (j=0;j<pcis->n_shared[i];j++){
2589           if (array_int[ pcis->shared[i][j] ] > 0 ){
2590             my_faces_connectivity[my_faces]=pcis->neigh[i];
2591             my_faces++;
2592             break;
2593           }
2594         }
2595       }
2596       if (rank_prec_comm == master_proc) {
2597         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2598         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2599         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2600         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2601         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2602       }
2603       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2604       if (rank_prec_comm == master_proc) {
2605         faces_xadj[0]=0;
2606         faces_displacements[0]=0;
2607         j=0;
2608         for (i=1;i<size_prec_comm+1;i++) {
2609           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2610           if (number_of_faces[i-1]) {
2611             j++;
2612             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2613           }
2614         }
2615       }
2616       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2617       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2618       ierr = PetscFree(array_int);CHKERRQ(ierr);
2619       if (rank_prec_comm == master_proc) {
2620         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2621         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2622         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2623         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2624       }
2625 
2626       if ( rank_prec_comm == master_proc ) {
2627 
2628         PetscInt heuristic_for_metis=3;
2629 
2630         ncon=1;
2631         faces_nvtxs=n_subdomains;
2632         /* partition graoh induced by face connectivity */
2633         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2634         ierr = METIS_SetDefaultOptions(options);
2635         /* we need a contiguous partition of the coarse mesh */
2636         options[METIS_OPTION_CONTIG]=1;
2637         options[METIS_OPTION_NITER]=30;
2638         if (pcbddc->coarsening_ratio > 1) {
2639           if (n_subdomains>n_parts*heuristic_for_metis) {
2640             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2641             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2642             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2643             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2644           } else {
2645             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2646             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2647           }
2648         } else {
2649           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2650         }
2651         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2652         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2653         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2654 
2655         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2656         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2657         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2658         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2659       }
2660 
2661       /* Create new communicator for coarse problem splitting the old one */
2662       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2663         coarse_color=0;              /* for communicator splitting */
2664         active_rank=rank_prec_comm;  /* for insertion of matrix values */
2665       }
2666       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2667          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2668       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2669 
2670       if ( coarse_color == 0 ) {
2671         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2672         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2673       } else {
2674         rank_coarse_comm = MPI_PROC_NULL;
2675       }
2676 
2677       /* master proc take care of arranging and distributing coarse information */
2678       if (rank_coarse_comm == master_proc) {
2679         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2680         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2681         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2682         /* some initializations */
2683         displacements_recv[0]=0;
2684         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2685         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2686         for (j=0;j<size_coarse_comm;j++) {
2687           for (i=0;i<size_prec_comm;i++) {
2688           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2689           }
2690         }
2691         /* displacements needed for scatterv of total_ranks_recv */
2692       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2693 
2694         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2695         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2696         for (j=0;j<size_coarse_comm;j++) {
2697           for (i=0;i<size_prec_comm;i++) {
2698             if (coarse_subdivision[i]==j) {
2699               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2700               total_count_recv[j]+=1;
2701             }
2702           }
2703         }
2704         /*for (j=0;j<size_coarse_comm;j++) {
2705           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2706           for (i=0;i<total_count_recv[j];i++) {
2707             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2708           }
2709           printf("\n");
2710         }*/
2711 
2712         /* identify new decomposition in terms of ranks in the old communicator */
2713         for (i=0;i<n_subdomains;i++) {
2714           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2715         }
2716         /*printf("coarse_subdivision in old end new ranks\n");
2717         for (i=0;i<size_prec_comm;i++)
2718           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2719             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2720           } else {
2721             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2722           }
2723         printf("\n");*/
2724       }
2725 
2726       /* Scatter new decomposition for send details */
2727       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2728       /* Scatter receiving details to members of coarse decomposition */
2729       if ( coarse_color == 0) {
2730         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2731         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2732         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2733       }
2734 
2735       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2736       if (coarse_color == 0) {
2737         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2738         for (i=0;i<count_recv;i++)
2739           printf("%d ",ranks_recv[i]);
2740         printf("\n");
2741       }*/
2742 
2743       if (rank_prec_comm == master_proc) {
2744         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2745         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2746         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2747         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2748       }
2749 #endif
2750       break;
2751     }
2752 
2753     case(REPLICATED_BDDC):
2754 
2755       pcbddc->coarse_communications_type = GATHERS_BDDC;
2756       coarse_mat_type = MATSEQAIJ;
2757       coarse_pc_type  = PCLU;
2758       coarse_ksp_type  = KSPPREONLY;
2759       coarse_comm = PETSC_COMM_SELF;
2760       active_rank = rank_prec_comm;
2761       break;
2762 
2763     case(PARALLEL_BDDC):
2764 
2765       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2766       coarse_mat_type = MATAIJ;
2767       coarse_pc_type  = PCREDUNDANT;
2768       coarse_ksp_type  = KSPPREONLY;
2769       coarse_comm = prec_comm;
2770       active_rank = rank_prec_comm;
2771       break;
2772 
2773     case(SEQUENTIAL_BDDC):
2774       pcbddc->coarse_communications_type = GATHERS_BDDC;
2775       coarse_mat_type = MATAIJ;
2776       coarse_pc_type = PCLU;
2777       coarse_ksp_type  = KSPPREONLY;
2778       coarse_comm = PETSC_COMM_SELF;
2779       active_rank = master_proc;
2780       break;
2781   }
2782 
2783   switch(pcbddc->coarse_communications_type){
2784 
2785     case(SCATTERS_BDDC):
2786       {
2787         if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2788 
2789           IS coarse_IS;
2790 
2791           if(pcbddc->coarsening_ratio == 1) {
2792             ins_local_primal_size = pcbddc->local_primal_size;
2793             ins_local_primal_indices = pcbddc->local_primal_indices;
2794             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2795             /* nonzeros */
2796             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2797             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2798             for (i=0;i<ins_local_primal_size;i++) {
2799               dnz[i] = ins_local_primal_size;
2800             }
2801           } else {
2802             PetscMPIInt send_size;
2803             PetscMPIInt *send_buffer;
2804             PetscInt    *aux_ins_indices;
2805             PetscInt    ii,jj;
2806             MPI_Request *requests;
2807 
2808             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2809             /* reusing pcbddc->local_primal_displacements and pcbddc->replicated_primal_size */
2810             ierr = PetscFree(pcbddc->local_primal_displacements);CHKERRQ(ierr);
2811             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&pcbddc->local_primal_displacements);CHKERRQ(ierr);
2812             pcbddc->replicated_primal_size = count_recv;
2813             j = 0;
2814             for (i=0;i<count_recv;i++) {
2815               pcbddc->local_primal_displacements[i] = j;
2816               j += pcbddc->local_primal_sizes[ranks_recv[i]];
2817             }
2818             pcbddc->local_primal_displacements[count_recv] = j;
2819             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2820             /* allocate auxiliary space */
2821             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2822             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2823             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2824             /* allocate stuffs for message massing */
2825             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2826             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2827             /* send indices to be inserted */
2828             for (i=0;i<count_recv;i++) {
2829               send_size = pcbddc->local_primal_sizes[ranks_recv[i]];
2830               ierr = MPI_Irecv(&pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2831             }
2832             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2833               send_size = pcbddc->local_primal_size;
2834               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2835               for (i=0;i<send_size;i++) {
2836                 send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2837               }
2838               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2839             }
2840             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2841             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2842               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2843             }
2844             j = 0;
2845             for (i=0;i<count_recv;i++) {
2846               ii = pcbddc->local_primal_displacements[i+1]-pcbddc->local_primal_displacements[i];
2847               localsizes2[i] = ii*ii;
2848               localdispl2[i] = j;
2849               j += localsizes2[i];
2850               jj = pcbddc->local_primal_displacements[i];
2851               /* it counts the coarse subdomains sharing the coarse node */
2852               for (k=0;k<ii;k++) {
2853                 aux_ins_indices[pcbddc->replicated_local_primal_indices[jj+k]] += 1;
2854               }
2855             }
2856             /* temp_coarse_mat_vals used to store matrix values to be received */
2857             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2858             /* evaluate how many values I will insert in coarse mat */
2859             ins_local_primal_size = 0;
2860             for (i=0;i<coarse_size;i++) {
2861               if (aux_ins_indices[i]) {
2862                 ins_local_primal_size++;
2863               }
2864             }
2865             /* evaluate indices I will insert in coarse mat */
2866             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2867             j = 0;
2868             for(i=0;i<coarse_size;i++) {
2869               if(aux_ins_indices[i]) {
2870                 ins_local_primal_indices[j] = i;
2871                 j++;
2872               }
2873             }
2874             /* processes partecipating in coarse problem receive matrix data from their friends */
2875             for (i=0;i<count_recv;i++) {
2876               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2877             }
2878             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2879               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2880               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2881             }
2882             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2883             /* nonzeros */
2884             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2885             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2886             /* use aux_ins_indices to realize a global to local mapping */
2887             j=0;
2888             for(i=0;i<coarse_size;i++){
2889               if(aux_ins_indices[i]==0){
2890                 aux_ins_indices[i]=-1;
2891               } else {
2892                 aux_ins_indices[i]=j;
2893                 j++;
2894               }
2895             }
2896             for (i=0;i<count_recv;i++) {
2897               j = pcbddc->local_primal_sizes[ranks_recv[i]];
2898               for (k=0;k<j;k++) {
2899                 dnz[aux_ins_indices[pcbddc->replicated_local_primal_indices[pcbddc->local_primal_displacements[i]+k]]] += j;
2900               }
2901             }
2902             /* check */
2903             for (i=0;i<ins_local_primal_size;i++) {
2904               if (dnz[i] > ins_local_primal_size) {
2905                 dnz[i] = ins_local_primal_size;
2906               }
2907             }
2908             ierr = PetscFree(requests);CHKERRQ(ierr);
2909             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2910             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2911           }
2912           /* create local to global mapping needed by coarse MATIS */
2913           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2914           coarse_comm = prec_comm;
2915           active_rank = rank_prec_comm;
2916           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2917           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2918           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2919         } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2920           /* arrays for values insertion */
2921           ins_local_primal_size = pcbddc->local_primal_size;
2922           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2923           ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr);
2924           for (j=0;j<ins_local_primal_size;j++){
2925             ins_local_primal_indices[j]=pcbddc->local_primal_indices[j];
2926             for (i=0;i<ins_local_primal_size;i++) {
2927               ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2928             }
2929           }
2930         }
2931         break;
2932 
2933     }
2934 
2935     case(GATHERS_BDDC):
2936       {
2937 
2938         PetscMPIInt mysize,mysize2;
2939         PetscMPIInt *send_buffer;
2940 
2941         if (rank_prec_comm==active_rank) {
2942           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscMPIInt),&pcbddc->replicated_local_primal_indices);CHKERRQ(ierr);
2943           ierr = PetscMalloc ( pcbddc->replicated_primal_size*sizeof(PetscScalar),&pcbddc->replicated_local_primal_values);CHKERRQ(ierr);
2944           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2945           ierr = PetscMalloc ( size_prec_comm*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2946           /* arrays for values insertion */
2947       for (i=0;i<size_prec_comm;i++) localsizes2[i]=pcbddc->local_primal_sizes[i]*pcbddc->local_primal_sizes[i];
2948           localdispl2[0]=0;
2949       for (i=1;i<size_prec_comm;i++) localdispl2[i]=localsizes2[i-1]+localdispl2[i-1];
2950           j=0;
2951       for (i=0;i<size_prec_comm;i++) j+=localsizes2[i];
2952           ierr = PetscMalloc ( j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2953         }
2954 
2955         mysize=pcbddc->local_primal_size;
2956         mysize2=pcbddc->local_primal_size*pcbddc->local_primal_size;
2957         ierr = PetscMalloc(mysize*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2958     for (i=0; i<mysize; i++) send_buffer[i]=(PetscMPIInt)pcbddc->local_primal_indices[i];
2959 
2960         if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC){
2961           ierr = MPI_Gatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2962           ierr = MPI_Gatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,master_proc,prec_comm);CHKERRQ(ierr);
2963         } else {
2964           ierr = MPI_Allgatherv(send_buffer,mysize,MPIU_INT,&pcbddc->replicated_local_primal_indices[0],pcbddc->local_primal_sizes,pcbddc->local_primal_displacements,MPIU_INT,prec_comm);CHKERRQ(ierr);
2965           ierr = MPI_Allgatherv(&coarse_submat_vals[0],mysize2,MPIU_SCALAR,&temp_coarse_mat_vals[0],localsizes2,localdispl2,MPIU_SCALAR,prec_comm);CHKERRQ(ierr);
2966         }
2967         ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2968         break;
2969       }/* switch on coarse problem and communications associated with finished */
2970   }
2971 
2972   /* Now create and fill up coarse matrix */
2973   if ( rank_prec_comm == active_rank ) {
2974 
2975     Mat matis_coarse_local_mat;
2976 
2977     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2978       ierr = MatCreate(coarse_comm,&pcbddc->coarse_mat);CHKERRQ(ierr);
2979       ierr = MatSetSizes(pcbddc->coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr);
2980       ierr = MatSetType(pcbddc->coarse_mat,coarse_mat_type);CHKERRQ(ierr);
2981       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2982       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2983       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2984       ierr = MatSetOption(pcbddc->coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2985       ierr = MatSetOption(pcbddc->coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2986     } else {
2987       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&pcbddc->coarse_mat);CHKERRQ(ierr);
2988       ierr = MatSetUp(pcbddc->coarse_mat);CHKERRQ(ierr);
2989       ierr = MatISGetLocalMat(pcbddc->coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2990       ierr = MatSetOptionsPrefix(pcbddc->coarse_mat,"coarse_");CHKERRQ(ierr);
2991       ierr = MatSetFromOptions(pcbddc->coarse_mat);CHKERRQ(ierr);
2992       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2993       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2994       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2995     }
2996     /* preallocation */
2997     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2998 
2999       PetscInt lrows,lcols,bs;
3000 
3001       ierr = MatGetLocalSize(pcbddc->coarse_mat,&lrows,&lcols);CHKERRQ(ierr);
3002       ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr);
3003       ierr = MatGetBlockSize(pcbddc->coarse_mat,&bs);CHKERRQ(ierr);
3004 
3005       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
3006 
3007         Vec         vec_dnz,vec_onz;
3008         PetscScalar *my_dnz,*my_onz,*array;
3009         PetscInt    *mat_ranges,*row_ownership;
3010         PetscInt    coarse_index_row,coarse_index_col,owner;
3011 
3012         ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr);
3013         ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
3014         ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr);
3015         ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr);
3016         ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
3017 
3018         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr);
3019         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr);
3020         ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3021         ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
3022 
3023         ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr);
3024         ierr = MatGetOwnershipRanges(pcbddc->coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
3025         for (i=0;i<size_prec_comm;i++) {
3026           for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
3027             row_ownership[j]=i;
3028           }
3029         }
3030 
3031         for (i=0;i<pcbddc->local_primal_size;i++) {
3032           coarse_index_row = pcbddc->local_primal_indices[i];
3033           owner = row_ownership[coarse_index_row];
3034           for (j=i;j<pcbddc->local_primal_size;j++) {
3035             owner = row_ownership[coarse_index_row];
3036             coarse_index_col = pcbddc->local_primal_indices[j];
3037             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) {
3038               my_dnz[i] += 1.0;
3039             } else {
3040               my_onz[i] += 1.0;
3041             }
3042             if (i != j) {
3043               owner = row_ownership[coarse_index_col];
3044               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) {
3045                 my_dnz[j] += 1.0;
3046               } else {
3047                 my_onz[j] += 1.0;
3048               }
3049             }
3050           }
3051         }
3052         ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
3053         ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
3054         if (pcbddc->local_primal_size) {
3055           ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
3056           ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,pcbddc->local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
3057         }
3058         ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
3059         ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
3060         ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
3061         ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
3062         j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
3063         ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
3064         for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
3065 
3066         ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
3067         ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
3068         for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
3069 
3070         ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
3071         ierr = PetscFree(my_dnz);CHKERRQ(ierr);
3072         ierr = PetscFree(my_onz);CHKERRQ(ierr);
3073         ierr = PetscFree(row_ownership);CHKERRQ(ierr);
3074         ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
3075         ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
3076       } else {
3077         for (k=0;k<size_prec_comm;k++){
3078           offset=pcbddc->local_primal_displacements[k];
3079           offset2=localdispl2[k];
3080           ins_local_primal_size = pcbddc->local_primal_sizes[k];
3081           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3082           for (j=0;j<ins_local_primal_size;j++){
3083             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3084           }
3085           for (j=0;j<ins_local_primal_size;j++) {
3086             ierr = MatPreallocateSet(ins_local_primal_indices[j],ins_local_primal_size,ins_local_primal_indices,dnz,onz);CHKERRQ(ierr);
3087           }
3088           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3089         }
3090       }
3091 
3092       /* check */
3093       for (i=0;i<lrows;i++) {
3094         if (dnz[i]>lcols) dnz[i]=lcols;
3095         if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols;
3096       }
3097       ierr = MatSeqAIJSetPreallocation(pcbddc->coarse_mat,0,dnz);CHKERRQ(ierr);
3098       ierr = MatMPIAIJSetPreallocation(pcbddc->coarse_mat,0,dnz,0,onz);CHKERRQ(ierr);
3099       ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3100     } else {
3101       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
3102       ierr = PetscFree(dnz);CHKERRQ(ierr);
3103     }
3104     /* insert values */
3105     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
3106       ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3107     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3108       if (pcbddc->coarsening_ratio == 1) {
3109         ins_coarse_mat_vals = coarse_submat_vals;
3110         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
3111       } else {
3112         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3113         for (k=0;k<pcbddc->replicated_primal_size;k++) {
3114           offset = pcbddc->local_primal_displacements[k];
3115           offset2 = localdispl2[k];
3116           ins_local_primal_size = pcbddc->local_primal_displacements[k+1]-pcbddc->local_primal_displacements[k];
3117           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3118           for (j=0;j<ins_local_primal_size;j++){
3119             ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3120           }
3121           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3122           ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3123           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3124         }
3125       }
3126       ins_local_primal_indices = 0;
3127       ins_coarse_mat_vals = 0;
3128     } else {
3129       for (k=0;k<size_prec_comm;k++){
3130         offset=pcbddc->local_primal_displacements[k];
3131         offset2=localdispl2[k];
3132         ins_local_primal_size = pcbddc->local_primal_sizes[k];
3133         ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
3134         for (j=0;j<ins_local_primal_size;j++){
3135           ins_local_primal_indices[j]=(PetscInt)pcbddc->replicated_local_primal_indices[offset+j];
3136         }
3137         ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
3138         ierr = MatSetValues(pcbddc->coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
3139         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
3140       }
3141       ins_local_primal_indices = 0;
3142       ins_coarse_mat_vals = 0;
3143     }
3144     ierr = MatAssemblyBegin(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3145     ierr = MatAssemblyEnd(pcbddc->coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3146     /* symmetry of coarse matrix */
3147     if (issym) {
3148       ierr = MatSetOption(pcbddc->coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3149     }
3150     ierr = MatGetVecs(pcbddc->coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3151   }
3152 
3153   /* create loc to glob scatters if needed */
3154   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
3155      IS local_IS,global_IS;
3156      ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr);
3157      ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr);
3158      ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3159      ierr = ISDestroy(&local_IS);CHKERRQ(ierr);
3160      ierr = ISDestroy(&global_IS);CHKERRQ(ierr);
3161   }
3162 
3163   /* free memory no longer needed */
3164   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
3165   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
3166   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
3167   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
3168   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
3169   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
3170 
3171   /* Compute coarse null space */
3172   CoarseNullSpace = 0;
3173   if (pcbddc->NullSpace) {
3174     ierr = PCBDDCNullSpaceAssembleCoarse(pc,&CoarseNullSpace);CHKERRQ(ierr);
3175   }
3176 
3177   /* KSP for coarse problem */
3178   if (rank_prec_comm == active_rank) {
3179     PetscBool isbddc=PETSC_FALSE;
3180 
3181     ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr);
3182     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3183     ierr = KSPSetOperators(pcbddc->coarse_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3184     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr);
3185     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3186     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3187     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3188     /* Allow user's customization */
3189     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
3190     /* Set Up PC for coarse problem BDDC */
3191     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3192       i = pcbddc->current_level+1;
3193       ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr);
3194       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3195       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3196       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
3197       if (CoarseNullSpace) {
3198         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3199       }
3200       if (dbg_flag) {
3201         ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr);
3202         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3203       }
3204     } else {
3205       if (CoarseNullSpace) {
3206         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3207       }
3208     }
3209     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3210     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3211 
3212     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr);
3213     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3214     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3215     if (j == 1) {
3216       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3217       if (isbddc) {
3218         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
3219       }
3220     }
3221   }
3222   /* Check coarse problem if requested */
3223   if ( dbg_flag && rank_prec_comm == active_rank ) {
3224     KSP check_ksp;
3225     PC  check_pc;
3226     Vec check_vec;
3227     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
3228     KSPType check_ksp_type;
3229 
3230     /* Create ksp object suitable for extreme eigenvalues' estimation */
3231     ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr);
3232     ierr = KSPSetOperators(check_ksp,pcbddc->coarse_mat,pcbddc->coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3233     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3234     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3235       if (issym) check_ksp_type = KSPCG;
3236       else check_ksp_type = KSPGMRES;
3237       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3238     } else {
3239       check_ksp_type = KSPPREONLY;
3240     }
3241     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3242     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3243     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3244     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3245     /* create random vec */
3246     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3247     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3248     if (CoarseNullSpace) {
3249       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3250     }
3251     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3252     /* solve coarse problem */
3253     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3254     if (CoarseNullSpace) {
3255       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3256     }
3257     /* check coarse problem residual error */
3258     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3259     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3260     ierr = MatMult(pcbddc->coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3261     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3262     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3263     /* get eigenvalue estimation if inexact */
3264     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3265       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3266       ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr);
3267       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr);
3268       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3269     }
3270     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3271     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3272     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3273   }
3274   if (dbg_flag) {
3275     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3276   }
3277   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3278 
3279   PetscFunctionReturn(0);
3280 }
3281 
3282