xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision f34684f1ec6d42f1be6f8f2de4992eb43c62fbb1)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCSetLevel"
47 PetscErrorCode PCBDDCSetLevel(PC pc,PetscInt level)
48 {
49   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
50 
51   PetscFunctionBegin;
52   pcbddc->current_level=level;
53   PetscFunctionReturn(0);
54 }
55 
56 #undef __FUNCT__
57 #define __FUNCT__ "PCBDDCResetCustomization"
58 PetscErrorCode PCBDDCResetCustomization(PC pc)
59 {
60   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
61   PetscInt       i;
62   PetscErrorCode ierr;
63 
64   PetscFunctionBegin;
65   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
66   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
67   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
68   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
69   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
70   for (i=0;i<pcbddc->n_ISForDofs;i++) {
71     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
72   }
73   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "PCBDDCResetTopography"
79 PetscErrorCode PCBDDCResetTopography(PC pc)
80 {
81   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
82   PetscErrorCode ierr;
83 
84   PetscFunctionBegin;
85   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
86   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
87   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "PCBDDCResetSolvers"
93 PetscErrorCode PCBDDCResetSolvers(PC pc)
94 {
95   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
96   PetscErrorCode ierr;
97 
98   PetscFunctionBegin;
99   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
101   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
102   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
103   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
104   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
105   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
106   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
107   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
108   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
109   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
110   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
111   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
112   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
113   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
114   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
115   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
116   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 #undef __FUNCT__
121 #define __FUNCT__ "PCBDDCCreateWorkVectors"
122 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
123 {
124   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
125   PC_IS          *pcis = (PC_IS*)pc->data;
126   VecType        impVecType;
127   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
128   PetscErrorCode ierr;
129 
130   PetscFunctionBegin;
131   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
132   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
133   local_primal_size = n_constraints+n_vertices;
134   n_R = pcis->n-n_vertices;
135   /* local work vectors */
136   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
137   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
138   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
139   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
140   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
141   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
142   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
143   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
144   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
145   if (n_constraints) {
146     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
147     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
148     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
155 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
156 {
157   PetscErrorCode         ierr;
158   /* pointers to pcis and pcbddc */
159   PC_IS*                 pcis = (PC_IS*)pc->data;
160   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
161   /* submatrices of local problem */
162   Mat                    A_RV,A_VR,A_VV;
163   /* working matrices */
164   Mat                    M1,M2,M3,C_CR;
165   /* working vectors */
166   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
167   /* additional working stuff */
168   IS                     is_aux;
169   ISLocalToGlobalMapping BtoNmap;
170   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
171   const PetscScalar      *array,*row_cmat_values;
172   const PetscInt         *row_cmat_indices,*idx_R_local;
173   PetscInt               *vertices,*idx_V_B,*auxindices;
174   PetscInt               n_vertices,n_constraints,size_of_constraint;
175   PetscInt               i,j,n_R,n_D,n_B;
176   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
177   /* Vector and matrix types */
178   VecType                impVecType;
179   MatType                impMatType;
180   /* some shortcuts to scalars */
181   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
182   /* for debugging purposes */
183   PetscReal              *coarsefunctions_errors,*constraints_errors;
184 
185   PetscFunctionBegin;
186   /* get number of vertices and their local indices */
187   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
188   n_constraints = pcbddc->local_primal_size-n_vertices;
189   /* Set Non-overlapping dimensions */
190   n_B = pcis->n_B; n_D = pcis->n - n_B;
191   n_R = pcis->n-n_vertices;
192 
193   /* Set types for local objects needed by BDDC precondtioner */
194   impMatType = MATSEQDENSE;
195   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
196 
197   /* Allocating some extra storage just to be safe */
198   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
199   for (i=0;i<pcis->n;i++) auxindices[i]=i;
200 
201   /* vertices in boundary numbering */
202   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
203   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
204   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
205   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
206   if (i != n_vertices) {
207     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
208   }
209 
210   /* some work vectors on vertices and/or constraints */
211   if (n_vertices) {
212     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
213     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
214     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
215     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
216   }
217   if (n_constraints) {
218     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
219     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
220   }
221 
222   /* Precompute stuffs needed for preprocessing and application of BDDC*/
223   if (n_constraints) {
224     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
225     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
226     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
227     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
228 
229     /* Extract constraints on R nodes: C_{CR}  */
230     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
231     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
232     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
233 
234     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
235     for (i=0;i<n_constraints;i++) {
236       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
237       /* Get row of constraint matrix in R numbering */
238       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
239       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
240       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
241       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
242       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
243       /* Solve for row of constraint matrix in R numbering */
244       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
245       /* Set values in local_auxmat2 */
246       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
247       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
248       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
249     }
250     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
251     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
252     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
253 
254     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
255     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
256     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
257     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
258     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
259     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
260     ierr = MatSetUp(M1);CHKERRQ(ierr);
261     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
262     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
263     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
264     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
265     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
266     ierr = MatDestroy(&M2);CHKERRQ(ierr);
267     ierr = MatDestroy(&M3);CHKERRQ(ierr);
268     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
269     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
270   }
271 
272   /* Get submatrices from subdomain matrix */
273   if (n_vertices) {
274     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
275     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
276     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
277     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
278     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
279   }
280 
281   /* Matrix of coarse basis functions (local) */
282   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
283   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
284   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
285   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
286   if (pcbddc->switch_static || pcbddc->dbg_flag) {
287     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
288     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
289     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
290     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
291   }
292 
293   if (pcbddc->dbg_flag) {
294     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
295     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
296     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
297   }
298   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
299   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
300 
301   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
302 
303   /* vertices */
304   for (i=0;i<n_vertices;i++) {
305     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
306     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
307     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
308     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
309     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
310     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
311     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
312     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
313     if (n_constraints) {
314       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
315       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
316       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
317     }
318     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
319     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
320 
321     /* Set values in coarse basis function and subdomain part of coarse_mat */
322     /* coarse basis functions */
323     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
324     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
325     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
326     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
327     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
329     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
330     if (pcbddc->switch_static || pcbddc->dbg_flag) {
331       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
332       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
333       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
334       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
335       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
336     }
337     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
338     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
339     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
340     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
341     if (n_constraints) {
342       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
343       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
344       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
345     }
346 
347     /* check */
348     if (pcbddc->dbg_flag) {
349       /* assemble subdomain vector on local nodes */
350       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
351       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
352       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
353       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
354       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
355       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
357       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
358       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
359       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
360       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
361       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
362       if (n_constraints) {
363         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
364         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
365         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
366       }
367       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
368       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
369       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
370       /* check saddle point solution */
371       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
372       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
373       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
374       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
375       /* shift by the identity matrix */
376       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
377       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
378       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
379       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
380     }
381   }
382 
383   /* constraints */
384   for (i=0;i<n_constraints;i++) {
385     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
386     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
387     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
388     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
389     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
390     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
391     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
392     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
393     if (n_vertices) {
394       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
395     }
396     /* Set values in coarse basis function and subdomain part of coarse_mat */
397     /* coarse basis functions */
398     j = i+n_vertices; /* don't touch this! */
399     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
400     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
403     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
404     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
405     if (pcbddc->switch_static || pcbddc->dbg_flag) {
406       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
407       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
408       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
409       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
410       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
411     }
412     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
413     if (n_vertices) {
414       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
415       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
416       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
417     }
418     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
419     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
420     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
421 
422     if (pcbddc->dbg_flag) {
423       /* assemble subdomain vector on nodes */
424       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
425       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
430       /* assemble subdomain vector of lagrange multipliers */
431       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
432       if (n_vertices) {
433         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
434         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
435         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
436       }
437       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
438       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
439       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
440       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
441       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
442       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
443       /* check saddle point solution */
444       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
445       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
446       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
447       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
448       /* shift by the identity matrix */
449       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
450       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
451       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
452       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
453     }
454   }
455   /* call assembling routines for local coarse basis */
456   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
457   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
458   if (pcbddc->switch_static || pcbddc->dbg_flag) {
459     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
460     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
461   }
462 
463   /* compute other basis functions for non-symmetric problems */
464   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
465   if (!setsym || (setsym && !issym)) {
466     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
467     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
468     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
469     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
470     if (pcbddc->switch_static || pcbddc->dbg_flag) {
471       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
472       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
473       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
474       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
475     }
476     for (i=0;i<pcbddc->local_primal_size;i++) {
477       if (n_constraints) {
478         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
479         for (j=0;j<n_constraints;j++) {
480           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
481         }
482         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
483         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
484       }
485       if (i<n_vertices) {
486         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
487         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
488         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
489         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
490         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
491         if (n_constraints) {
492           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
493         }
494       } else {
495         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
496       }
497       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
498       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
499       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
500       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
501       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
502       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
503       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
504       if (i<n_vertices) {
505         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
506       }
507       if (pcbddc->switch_static || pcbddc->dbg_flag) {
508         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
509         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
510         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
511         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
512         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
513       }
514 
515       if (pcbddc->dbg_flag) {
516         /* assemble subdomain vector on nodes */
517         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
518         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
519         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
520         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
521         if (i<n_vertices) {
522           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
523         }
524         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
525         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
526         /* assemble subdomain vector of lagrange multipliers */
527         for (j=0;j<pcbddc->local_primal_size;j++) {
528           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
529         }
530         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
531         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
532         /* check saddle point solution */
533         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
534         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
535         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
536         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
537         /* shift by the identity matrix */
538         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
539         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
540         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
541         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
542       }
543     }
544     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
545     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
546     if (pcbddc->switch_static || pcbddc->dbg_flag) {
547       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
548       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
549     }
550   }
551   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
552   /* Checking coarse_sub_mat and coarse basis functios */
553   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
554   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
555   if (pcbddc->dbg_flag) {
556     Mat         coarse_sub_mat;
557     Mat         AUXMAT,TM1,TM2,TM3,TM4;
558     Mat         coarse_phi_D,coarse_phi_B;
559     Mat         coarse_psi_D,coarse_psi_B;
560     Mat         A_II,A_BB,A_IB,A_BI;
561     MatType     checkmattype=MATSEQAIJ;
562     PetscReal   real_value;
563 
564     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
565     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
566     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
567     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
568     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
569     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
570     if (pcbddc->coarse_psi_B) {
571       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
572       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
573     }
574     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
575 
576     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
577     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
578     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
579     if (pcbddc->coarse_psi_B) {
580       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
581       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
583       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
590       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
591       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
592     } else {
593       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
594       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
595       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
596       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
597       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
598       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
599       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
600       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
601     }
602     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
603     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
604     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
605     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
606     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
607     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
608     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
609     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
610     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
611     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
612     for (i=0;i<pcbddc->local_primal_size;i++) {
613       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
614     }
615     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
616     for (i=0;i<pcbddc->local_primal_size;i++) {
617       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
618     }
619     if (pcbddc->coarse_psi_B) {
620       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
621       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
622         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
623       }
624       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
625       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
626         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
627       }
628     }
629     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
630     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
631     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
632     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
633     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
634     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
635     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
636     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
637     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
638     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
639     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
640     if (pcbddc->coarse_psi_B) {
641       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
642       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
643     }
644     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
645     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
646     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
647     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
648   }
649   /* free memory */
650   if (n_vertices) {
651     ierr = PetscFree(vertices);CHKERRQ(ierr);
652     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
653     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
654     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
655     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
656     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
657   }
658   if (n_constraints) {
659     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
660     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
661     ierr = MatDestroy(&M1);CHKERRQ(ierr);
662     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
663   }
664   ierr = PetscFree(auxindices);CHKERRQ(ierr);
665   /* get back data */
666   *coarse_submat_vals_n = coarse_submat_vals;
667   PetscFunctionReturn(0);
668 }
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
672 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
673 {
674   PC_IS*            pcis = (PC_IS*)(pc->data);
675   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
676   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
677   /* manage repeated solves */
678   MatReuse          reuse;
679   MatStructure      matstruct;
680   PetscErrorCode    ierr;
681 
682   PetscFunctionBegin;
683   /* get mat flags */
684   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
685   reuse = MAT_INITIAL_MATRIX;
686   if (pc->setupcalled) {
687     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
688     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
689     if (matstruct == SAME_NONZERO_PATTERN) {
690       reuse = MAT_REUSE_MATRIX;
691     } else {
692       reuse = MAT_INITIAL_MATRIX;
693     }
694   }
695   if (reuse == MAT_INITIAL_MATRIX) {
696     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
697     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
698     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
699     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
700     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
701   }
702 
703   /* transform local matrices if needed */
704   if (pcbddc->use_change_of_basis) {
705     Mat         change_mat_all;
706     PetscScalar *row_cmat_values;
707     PetscInt    *row_cmat_indices;
708     PetscInt    *nnz,*is_indices,*temp_indices;
709     PetscInt    i,j,k,n_D,n_B;
710 
711     /* Get Non-overlapping dimensions */
712     n_B = pcis->n_B;
713     n_D = pcis->n-n_B;
714 
715     /* compute nonzero structure of change of basis on all local nodes */
716     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
717     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
718     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
719     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
720     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
721     k=1;
722     for (i=0;i<n_B;i++) {
723       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
724       nnz[is_indices[i]]=j;
725       if (k < j) k = j;
726       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
727     }
728     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     /* assemble change of basis matrix on the whole set of local dofs */
730     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
731     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
732     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
733     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
734     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
735     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
736     for (i=0;i<n_D;i++) {
737       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
738     }
739     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
740     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
741     for (i=0;i<n_B;i++) {
742       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
743       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
744       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
745       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
746     }
747     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
748     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
749     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
750     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
751     if (i==1) {
752       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
753     } else {
754       Mat work_mat;
755       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
756       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
757       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
758     }
759     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
760     ierr = PetscFree(nnz);CHKERRQ(ierr);
761     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
762   } else {
763     /* without change of basis, the local matrix is unchanged */
764     if (!pcbddc->local_mat) {
765       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
766       pcbddc->local_mat = matis->A;
767     }
768   }
769 
770   /* get submatrices */
771   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
772   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
773   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
774   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
775   PetscFunctionReturn(0);
776 }
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
780 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
781 {
782   PC_IS*         pcis = (PC_IS*)(pc->data);
783   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
784   IS             is_aux1,is_aux2;
785   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
786   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
787   PetscBT        bitmask;
788   PetscErrorCode ierr;
789 
790   PetscFunctionBegin;
791   /* Set Non-overlapping dimensions */
792   n_B = pcis->n_B; n_D = pcis->n - n_B;
793   /* get vertex indices from constraint matrix */
794   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
795   /* Set number of constraints */
796   n_constraints = pcbddc->local_primal_size-n_vertices;
797   /* create auxiliary bitmask */
798   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
799   for (i=0;i<n_vertices;i++) {
800     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
801   }
802   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
803   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
804   for (i=0, n_R=0; i<pcis->n; i++) {
805     if (!PetscBTLookup(bitmask,i)) {
806       idx_R_local[n_R] = i;
807       n_R++;
808     }
809   }
810   ierr = PetscFree(vertices);CHKERRQ(ierr);
811   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
812 
813   /* print some info if requested */
814   if (pcbddc->dbg_flag) {
815     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
816     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
817     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
818     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
819     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
820     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
821     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
822   }
823 
824   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
825   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
826   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
827   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
828   for (i=0; i<n_D; i++) {
829     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
830   }
831   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
832   for (i=0, j=0; i<n_R; i++) {
833     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
834       aux_array1[j++] = i;
835     }
836   }
837   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
838   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
839   for (i=0, j=0; i<n_B; i++) {
840     if (!PetscBTLookup(bitmask,is_indices[i])) {
841       aux_array2[j++] = i;
842     }
843   }
844   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
845   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
846   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
847   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
849 
850   if (pcbddc->switch_static || pcbddc->dbg_flag) {
851     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
852     for (i=0, j=0; i<n_R; i++) {
853       if (PetscBTLookup(bitmask,idx_R_local[i])) {
854         aux_array1[j++] = i;
855       }
856     }
857     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
858     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
859     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
860   }
861   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
867 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
868 {
869   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
870 
871   PetscFunctionBegin;
872   pcbddc->use_exact_dirichlet=use;
873   PetscFunctionReturn(0);
874 }
875 
876 #undef __FUNCT__
877 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
878 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
879 {
880   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
881   PC_IS          *pcis = (PC_IS*)pc->data;
882   PC             pc_temp;
883   Mat            A_RR;
884   Vec            vec1,vec2,vec3;
885   MatStructure   matstruct;
886   PetscScalar    m_one = -1.0;
887   PetscReal      value;
888   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
889   PetscErrorCode ierr;
890 
891   PetscFunctionBegin;
892   /* Creating PC contexts for local Dirichlet and Neumann problems */
893   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
894 
895   /* DIRICHLET PROBLEM */
896   /* Matrix for Dirichlet problem is pcis->A_II */
897   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
898   if (!pcbddc->ksp_D) { /* create object if not yet build */
899     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
900     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
901     /* default */
902     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
903     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
904     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
905     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
906     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
907   }
908   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
909   /* Allow user's customization */
910   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
911   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
912   if (!n_D) {
913     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
914     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
915   }
916   /* Set Up KSP for Dirichlet problem of BDDC */
917   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
918   /* set ksp_D into pcis data */
919   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
920   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
921   pcis->ksp_D = pcbddc->ksp_D;
922 
923   /* NEUMANN PROBLEM */
924   /* Matrix for Neumann problem is A_RR -> we need to create it */
925   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
926   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
927   if (!pcbddc->ksp_R) { /* create object if not yet build */
928     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
929     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
930     /* default */
931     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
932     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
933     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
934     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
935     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
936   }
937   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
938   /* Allow user's customization */
939   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
940   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
941   if (!n_R) {
942     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
943     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
944   }
945   /* Set Up KSP for Neumann problem of BDDC */
946   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
947 
948   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
949 
950   /* Dirichlet */
951   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
952   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
953   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
954   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
955   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
956   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
957   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
958   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
959   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
960   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
961   /* need to be adapted? */
962   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
963   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
964   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
965   /* print info */
966   if (pcbddc->dbg_flag) {
967     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
968     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
969     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
970     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
971     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
972   }
973   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
974     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
975   }
976 
977   /* Neumann */
978   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
979   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
980   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
981   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
982   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
983   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
984   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
985   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
986   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
987   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
988   /* need to be adapted? */
989   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
990   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
991   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
992   /* print info */
993   if (pcbddc->dbg_flag) {
994     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
995     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
996   }
997   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
998     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
999   }
1000 
1001   /* free Neumann problem's matrix */
1002   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1008 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1009 {
1010   PetscErrorCode ierr;
1011   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1012 
1013   PetscFunctionBegin;
1014   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1015   if (pcbddc->local_auxmat1) {
1016     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1017     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1018   }
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1024 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1025 {
1026   PetscErrorCode ierr;
1027   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1028   PC_IS*            pcis = (PC_IS*)  (pc->data);
1029   const PetscScalar zero = 0.0;
1030 
1031   PetscFunctionBegin;
1032   /* Application of PHI^T (or PSI^T)  */
1033   if (pcbddc->coarse_psi_B) {
1034     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1035     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1036   } else {
1037     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1038     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1039   }
1040   /* Scatter data of coarse_rhs */
1041   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1042   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043 
1044   /* Local solution on R nodes */
1045   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1046   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1047   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1048   if (pcbddc->switch_static) {
1049     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   }
1052   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1053   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1054   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1056   if (pcbddc->switch_static) {
1057     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059   }
1060 
1061   /* Coarse solution */
1062   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1064     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1065   }
1066   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068 
1069   /* Sum contributions from two levels */
1070   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1071   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1077 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1078 {
1079   PetscErrorCode ierr;
1080   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1081 
1082   PetscFunctionBegin;
1083   switch (pcbddc->coarse_communications_type) {
1084     case SCATTERS_BDDC:
1085       ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1086       break;
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1093 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1094 {
1095   PetscErrorCode ierr;
1096   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1097 
1098   PetscFunctionBegin;
1099   switch (pcbddc->coarse_communications_type) {
1100     case SCATTERS_BDDC:
1101       ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1102       break;
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 /* uncomment for testing purposes */
1108 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1109 #undef __FUNCT__
1110 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1111 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1112 {
1113   PetscErrorCode    ierr;
1114   PC_IS*            pcis = (PC_IS*)(pc->data);
1115   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1116   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1117   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1118   MatType           impMatType=MATSEQAIJ;
1119   /* one and zero */
1120   PetscScalar       one=1.0,zero=0.0;
1121   /* space to store constraints and their local indices */
1122   PetscScalar       *temp_quadrature_constraint;
1123   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1124   /* iterators */
1125   PetscInt          i,j,k,total_counts,temp_start_ptr;
1126   /* stuff to store connected components stored in pcbddc->mat_graph */
1127   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1128   PetscInt          n_ISForFaces,n_ISForEdges;
1129   /* near null space stuff */
1130   MatNullSpace      nearnullsp;
1131   const Vec         *nearnullvecs;
1132   Vec               *localnearnullsp;
1133   PetscBool         nnsp_has_cnst;
1134   PetscInt          nnsp_size;
1135   PetscScalar       *array;
1136   /* BLAS integers */
1137   PetscBLASInt      lwork,lierr;
1138   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1139   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1140   /* LAPACK working arrays for SVD or POD */
1141   PetscBool         skip_lapack;
1142   PetscScalar       *work;
1143   PetscReal         *singular_vals;
1144 #if defined(PETSC_USE_COMPLEX)
1145   PetscReal         *rwork;
1146 #endif
1147 #if defined(PETSC_MISSING_LAPACK_GESVD)
1148   PetscBLASInt      Blas_one_2=1;
1149   PetscScalar       *temp_basis,*correlation_mat;
1150 #else
1151   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1152   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1153 #endif
1154   /* change of basis */
1155   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1156   PetscBool         boolforchange;
1157   PetscBT           touched,change_basis;
1158   /* auxiliary stuff */
1159   PetscInt          *nnz,*is_indices,*local_to_B;
1160   /* some quantities */
1161   PetscInt          n_vertices,total_primal_vertices;
1162   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1163 
1164 
1165   PetscFunctionBegin;
1166   /* Get index sets for faces, edges and vertices from graph */
1167   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1168     pcbddc->use_vertices = PETSC_TRUE;
1169   }
1170   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1171   /* print some info */
1172   if (pcbddc->dbg_flag) {
1173     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1174     i = 0;
1175     if (ISForVertices) {
1176       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1177     }
1178     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1179     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1180     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182   }
1183   /* check if near null space is attached to global mat */
1184   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1185   if (nearnullsp) {
1186     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1187   } else { /* if near null space is not provided BDDC uses constants by default */
1188     nnsp_size = 0;
1189     nnsp_has_cnst = PETSC_TRUE;
1190   }
1191   /* get max number of constraints on a single cc */
1192   max_constraints = nnsp_size;
1193   if (nnsp_has_cnst) max_constraints++;
1194 
1195   /*
1196        Evaluate maximum storage size needed by the procedure
1197        - temp_indices will contain start index of each constraint stored as follows
1198        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1199        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1200        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1201                                                                                                                                                          */
1202   total_counts = n_ISForFaces+n_ISForEdges;
1203   total_counts *= max_constraints;
1204   n_vertices = 0;
1205   if (ISForVertices) {
1206     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1207   }
1208   total_counts += n_vertices;
1209   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1210   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1211   total_counts = 0;
1212   max_size_of_constraint = 0;
1213   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1214     if (i<n_ISForEdges) {
1215       used_IS = &ISForEdges[i];
1216     } else {
1217       used_IS = &ISForFaces[i-n_ISForEdges];
1218     }
1219     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1220     total_counts += j;
1221     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1222   }
1223   total_counts *= max_constraints;
1224   total_counts += n_vertices;
1225   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1226   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1227   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1228   /* local to boundary numbering */
1229   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1230   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1231   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1232   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1233   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1234   /* get local part of global near null space vectors */
1235   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1236   for (k=0;k<nnsp_size;k++) {
1237     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1238     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240   }
1241 
1242   /* whether or not to skip lapack calls */
1243   skip_lapack = PETSC_TRUE;
1244   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1245 
1246   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1247   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1248     PetscScalar temp_work;
1249 #if defined(PETSC_MISSING_LAPACK_GESVD)
1250     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1251     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1252     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1253     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1254 #if defined(PETSC_USE_COMPLEX)
1255     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1256 #endif
1257     /* now we evaluate the optimal workspace using query with lwork=-1 */
1258     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1259     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1260     lwork = -1;
1261     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1262 #if !defined(PETSC_USE_COMPLEX)
1263     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1264 #else
1265     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1266 #endif
1267     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1268     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1269 #else /* on missing GESVD */
1270     /* SVD */
1271     PetscInt max_n,min_n;
1272     max_n = max_size_of_constraint;
1273     min_n = max_constraints;
1274     if (max_size_of_constraint < max_constraints) {
1275       min_n = max_size_of_constraint;
1276       max_n = max_constraints;
1277     }
1278     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1279 #if defined(PETSC_USE_COMPLEX)
1280     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1281 #endif
1282     /* now we evaluate the optimal workspace using query with lwork=-1 */
1283     lwork = -1;
1284     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1285     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1286     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1287     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1288 #if !defined(PETSC_USE_COMPLEX)
1289     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1290 #else
1291     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1292 #endif
1293     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1294     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1295 #endif /* on missing GESVD */
1296     /* Allocate optimal workspace */
1297     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1298     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1299   }
1300   /* Now we can loop on constraining sets */
1301   total_counts = 0;
1302   temp_indices[0] = 0;
1303   /* vertices */
1304   if (ISForVertices) {
1305     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1306     if (nnsp_has_cnst) { /* consider all vertices */
1307       for (i=0;i<n_vertices;i++) {
1308         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1309         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1310         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1311         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1312         total_counts++;
1313       }
1314     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1315       PetscBool used_vertex;
1316       for (i=0;i<n_vertices;i++) {
1317         used_vertex = PETSC_FALSE;
1318         k = 0;
1319         while (!used_vertex && k<nnsp_size) {
1320           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1321           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1322             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1323             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1324             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1325             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1326             total_counts++;
1327             used_vertex = PETSC_TRUE;
1328           }
1329           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1330           k++;
1331         }
1332       }
1333     }
1334     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1335     n_vertices = total_counts;
1336   }
1337 
1338   /* edges and faces */
1339   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1340     if (i<n_ISForEdges) {
1341       used_IS = &ISForEdges[i];
1342       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1343     } else {
1344       used_IS = &ISForFaces[i-n_ISForEdges];
1345       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1346     }
1347     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1348     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1349     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1350     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1351     /* change of basis should not be performed on local periodic nodes */
1352     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1353     if (nnsp_has_cnst) {
1354       PetscScalar quad_value;
1355       temp_constraints++;
1356       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1357       for (j=0;j<size_of_constraint;j++) {
1358         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1359         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1360         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1361       }
1362       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1363       if (boolforchange) {
1364         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1365       }
1366       total_counts++;
1367     }
1368     for (k=0;k<nnsp_size;k++) {
1369       PetscReal real_value;
1370       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1371       for (j=0;j<size_of_constraint;j++) {
1372         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1373         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1374         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1375       }
1376       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1377       /* check if array is null on the connected component */
1378       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1379       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1380       if (real_value > 0.0) { /* keep indices and values */
1381         temp_constraints++;
1382         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1383         if (boolforchange) {
1384           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1385         }
1386         total_counts++;
1387       }
1388     }
1389     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1390     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1391     if (!pcbddc->use_nnsp_true) {
1392       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1393 
1394 #if defined(PETSC_MISSING_LAPACK_GESVD)
1395       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1396          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1397          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1398             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1399             from that computed using LAPACKgesvd
1400          -> This is due to a different computation of eigenvectors in LAPACKheev
1401          -> The quality of the POD-computed basis will be the same */
1402       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1403       /* Store upper triangular part of correlation matrix */
1404       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1405       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1406       for (j=0;j<temp_constraints;j++) {
1407         for (k=0;k<j+1;k++) {
1408           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1409         }
1410       }
1411       /* compute eigenvalues and eigenvectors of correlation matrix */
1412       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1413       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1414 #if !defined(PETSC_USE_COMPLEX)
1415       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1416 #else
1417       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1418 #endif
1419       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1420       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1421       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1422       j=0;
1423       while (j < temp_constraints && singular_vals[j] < tol) j++;
1424       total_counts=total_counts-j;
1425       /* scale and copy POD basis into used quadrature memory */
1426       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1427       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1428       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1429       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1430       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1431       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1432       if (j<temp_constraints) {
1433         PetscInt ii;
1434         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1435         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1436         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1437         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1438         for (k=0;k<temp_constraints-j;k++) {
1439           for (ii=0;ii<size_of_constraint;ii++) {
1440             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1441           }
1442         }
1443       }
1444 #else  /* on missing GESVD */
1445       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1446       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1447       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1448       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1449 #if !defined(PETSC_USE_COMPLEX)
1450       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1451 #else
1452       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1453 #endif
1454       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1455       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1456       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1457       k = temp_constraints;
1458       if (k > size_of_constraint) k = size_of_constraint;
1459       j = 0;
1460       while (j < k && singular_vals[k-j-1] < tol) j++;
1461       total_counts = total_counts-temp_constraints+k-j;
1462 #endif /* on missing GESVD */
1463     }
1464   }
1465   /* free index sets of faces, edges and vertices */
1466   for (i=0;i<n_ISForFaces;i++) {
1467     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1468   }
1469   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1470   for (i=0;i<n_ISForEdges;i++) {
1471     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1472   }
1473   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1474   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1475 
1476   /* free workspace */
1477   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1478     ierr = PetscFree(work);CHKERRQ(ierr);
1479 #if defined(PETSC_USE_COMPLEX)
1480     ierr = PetscFree(rwork);CHKERRQ(ierr);
1481 #endif
1482     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1483 #if defined(PETSC_MISSING_LAPACK_GESVD)
1484     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1485     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1486 #endif
1487   }
1488   for (k=0;k<nnsp_size;k++) {
1489     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1490   }
1491   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1492 
1493   /* set quantities in pcbddc data structure */
1494   /* n_vertices defines the number of subdomain corners in the primal space */
1495   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1496   pcbddc->local_primal_size = total_counts;
1497   pcbddc->n_vertices = n_vertices;
1498   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1499 
1500   /* Create constraint matrix */
1501   /* The constraint matrix is used to compute the l2g map of primal dofs */
1502   /* so we need to set it up properly either with or without change of basis */
1503   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1504   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1505   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1506   /* array to compute a local numbering of constraints : vertices first then constraints */
1507   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1508   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1509   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1510   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1511   /* auxiliary stuff for basis change */
1512   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1513   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1514 
1515   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1516   total_primal_vertices=0;
1517   for (i=0;i<pcbddc->local_primal_size;i++) {
1518     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1519     if (size_of_constraint == 1) {
1520       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1521       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1522       aux_primal_minloc[total_primal_vertices]=0;
1523       total_primal_vertices++;
1524     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1525       PetscInt min_loc,min_index;
1526       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1527       /* find first untouched local node */
1528       k = 0;
1529       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1530       min_index = global_indices[k];
1531       min_loc = k;
1532       /* search the minimum among global nodes already untouched on the cc */
1533       for (k=1;k<size_of_constraint;k++) {
1534         /* there can be more than one constraint on a single connected component */
1535         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1536           min_index = global_indices[k];
1537           min_loc = k;
1538         }
1539       }
1540       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1541       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1542       aux_primal_minloc[total_primal_vertices]=min_loc;
1543       total_primal_vertices++;
1544     }
1545   }
1546   /* free workspace */
1547   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1548   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1549   /* permute indices in order to have a sorted set of vertices */
1550   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1551 
1552   /* nonzero structure of constraint matrix */
1553   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1554   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1555   j=total_primal_vertices;
1556   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1557     if (!PetscBTLookup(change_basis,i)) {
1558       nnz[j]=temp_indices[i+1]-temp_indices[i];
1559       j++;
1560     }
1561   }
1562   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1563   ierr = PetscFree(nnz);CHKERRQ(ierr);
1564   /* set values in constraint matrix */
1565   for (i=0;i<total_primal_vertices;i++) {
1566     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1567   }
1568   total_counts = total_primal_vertices;
1569   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1570     if (!PetscBTLookup(change_basis,i)) {
1571       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1572       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1573       total_counts++;
1574     }
1575   }
1576   /* assembling */
1577   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579   /*
1580   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1581   */
1582   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1583   if (pcbddc->use_change_of_basis) {
1584     PetscBool qr_needed = PETSC_FALSE;
1585     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1586     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1587     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1588     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1589     /* work arrays */
1590     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1591     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1592     /* nonzeros per row */
1593     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1594       if (PetscBTLookup(change_basis,i)) {
1595         qr_needed = PETSC_TRUE;
1596         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1597         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1598       }
1599     }
1600     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1601     ierr = PetscFree(nnz);CHKERRQ(ierr);
1602     /* Set initial identity in the matrix */
1603     for (i=0;i<pcis->n_B;i++) {
1604       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1605     }
1606 
1607     /* Now we loop on the constraints which need a change of basis */
1608     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1609     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1610     /* Change of basis matrix T computed via QR decomposition of constraints */
1611     if (qr_needed) {
1612       /* dual and primal dofs on a single cc */
1613       PetscInt     dual_dofs,primal_dofs;
1614       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1615       PetscInt     primal_counter;
1616       /* working stuff for GEQRF */
1617       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1618       PetscBLASInt lqr_work;
1619       /* working stuff for UNGQR */
1620       PetscScalar  *gqr_work,lgqr_work_t;
1621       PetscBLASInt lgqr_work;
1622       /* working stuff for TRTRS */
1623       PetscScalar  *trs_rhs;
1624       PetscBLASInt Blas_NRHS;
1625       /* pointers for values insertion into change of basis matrix */
1626       PetscInt     *start_rows,*start_cols;
1627       PetscScalar  *start_vals;
1628       /* working stuff for values insertion */
1629       PetscBT      is_primal;
1630 
1631       /* space to store Q */
1632       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1633       /* first we issue queries for optimal work */
1634       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1635       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1636       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1637       lqr_work = -1;
1638       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1639       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1640       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1641       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1642       lgqr_work = -1;
1643       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1644       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1645       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1646       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1647       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1648       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1649       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1650       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1651       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1652       /* array to store scaling factors for reflectors */
1653       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1654       /* array to store rhs and solution of triangular solver */
1655       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1656       /* array to store whether a node is primal or not */
1657       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1658       for (i=0;i<total_primal_vertices;i++) {
1659         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1660       }
1661 
1662       /* allocating workspace for check */
1663       if (pcbddc->dbg_flag) {
1664         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1665         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1666         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1667       }
1668 
1669       /* loop on constraints and see whether or not they need a change of basis */
1670       /* -> using implicit ordering contained in temp_indices data */
1671       total_counts = pcbddc->n_vertices;
1672       primal_counter = total_counts;
1673       while (total_counts<pcbddc->local_primal_size) {
1674         primal_dofs = 1;
1675         if (PetscBTLookup(change_basis,total_counts)) {
1676           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1677           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1678             primal_dofs++;
1679           }
1680           /* get constraint info */
1681           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1682           dual_dofs = size_of_constraint-primal_dofs;
1683 
1684           /* copy quadrature constraints for change of basis check */
1685           if (pcbddc->dbg_flag) {
1686             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1687             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1688           }
1689 
1690           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1691           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1692 
1693           /* compute QR decomposition of constraints */
1694           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1695           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1696           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1697           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1698           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1699           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1700           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1701 
1702           /* explictly compute R^-T */
1703           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1704           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1705           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1706           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1709           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1710           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1711           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1712           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1713 
1714           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1715           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1716           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1717           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1718           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1719           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1720           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1721           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1722           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1723 
1724           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1725              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1726              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1727           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1728           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1729           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1730           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1731           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1732           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1733           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1734           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1735           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1736           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1737 
1738           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1739           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1740           /* insert cols for primal dofs */
1741           for (j=0;j<primal_dofs;j++) {
1742             start_vals = &qr_basis[j*size_of_constraint];
1743             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1744             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1745           }
1746           /* insert cols for dual dofs */
1747           for (j=0,k=0;j<dual_dofs;k++) {
1748             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1749               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1750               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1751               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1752               j++;
1753             }
1754           }
1755 
1756           /* check change of basis */
1757           if (pcbddc->dbg_flag) {
1758             PetscInt   ii,jj;
1759             PetscBool valid_qr=PETSC_TRUE;
1760             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1761             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1762             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1763             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1764             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1765             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1766             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1767             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1768             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1769             for (jj=0;jj<size_of_constraint;jj++) {
1770               for (ii=0;ii<primal_dofs;ii++) {
1771                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1772                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1773               }
1774             }
1775             if (!valid_qr) {
1776               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1777               for (jj=0;jj<size_of_constraint;jj++) {
1778                 for (ii=0;ii<primal_dofs;ii++) {
1779                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1780                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1781                   }
1782                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1783                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1784                   }
1785                 }
1786               }
1787             } else {
1788               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1789             }
1790           }
1791           /* increment primal counter */
1792           primal_counter += primal_dofs;
1793         } else {
1794           if (pcbddc->dbg_flag) {
1795             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1796           }
1797         }
1798         /* increment constraint counter total_counts */
1799         total_counts += primal_dofs;
1800       }
1801       if (pcbddc->dbg_flag) {
1802         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1803         ierr = PetscFree(work);CHKERRQ(ierr);
1804       }
1805       /* free workspace */
1806       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1807       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1808       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1809       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1810       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1811       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1812     }
1813     /* assembling */
1814     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1815     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1816     /*
1817     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1818     */
1819   }
1820   /* free workspace */
1821   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1822   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1823   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1824   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1825   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1826   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1827   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1828   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1834 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1835 {
1836   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1837   PC_IS       *pcis = (PC_IS*)pc->data;
1838   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1839   PetscInt    bs,ierr,i,vertex_size;
1840   PetscViewer viewer=pcbddc->dbg_viewer;
1841 
1842   PetscFunctionBegin;
1843   /* Init local Graph struct */
1844   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1845 
1846   /* Check validity of the csr graph passed in by the user */
1847   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1848     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1849   }
1850   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1851   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1852     Mat mat_adj;
1853     const PetscInt *xadj,*adjncy;
1854     PetscBool flg_row=PETSC_TRUE;
1855 
1856     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1857     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1858     if (!flg_row) {
1859       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1860     }
1861     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1862     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1863     if (!flg_row) {
1864       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1865     }
1866     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1867   }
1868 
1869   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1870   vertex_size = 1;
1871   if (!pcbddc->n_ISForDofs) {
1872     IS *custom_ISForDofs;
1873 
1874     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1875     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1876     for (i=0;i<bs;i++) {
1877       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1878     }
1879     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1880     /* remove my references to IS objects */
1881     for (i=0;i<bs;i++) {
1882       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1883     }
1884     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1885   } else { /* mat block size as vertex size (used for elasticity) */
1886     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1887   }
1888 
1889   /* Setup of Graph */
1890   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1891 
1892   /* Graph's connected components analysis */
1893   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1894 
1895   /* print some info to stdout */
1896   if (pcbddc->dbg_flag) {
1897     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1904 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1905 {
1906   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1907   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   n = 0;
1912   vertices = 0;
1913   if (pcbddc->ConstraintMatrix) {
1914     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1915     for (i=0;i<local_primal_size;i++) {
1916       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1917       if (size_of_constraint == 1) n++;
1918       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1919     }
1920     if (vertices_idx) {
1921       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1922       n = 0;
1923       for (i=0;i<local_primal_size;i++) {
1924         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1925         if (size_of_constraint == 1) {
1926           vertices[n++]=row_cmat_indices[0];
1927         }
1928         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1929       }
1930     }
1931   }
1932   *n_vertices = n;
1933   if (vertices_idx) *vertices_idx = vertices;
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1939 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1940 {
1941   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1942   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1943   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1944   PetscBT        touched;
1945   PetscErrorCode ierr;
1946 
1947     /* This function assumes that the number of local constraints per connected component
1948        is not greater than the number of nodes defined for the connected component
1949        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1950   PetscFunctionBegin;
1951   n = 0;
1952   constraints_index = 0;
1953   if (pcbddc->ConstraintMatrix) {
1954     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1955     max_size_of_constraint = 0;
1956     for (i=0;i<local_primal_size;i++) {
1957       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1958       if (size_of_constraint > 1) {
1959         n++;
1960       }
1961       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1962       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1963     }
1964     if (constraints_idx) {
1965       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1966       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1967       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1968       n = 0;
1969       for (i=0;i<local_primal_size;i++) {
1970         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1971         if (size_of_constraint > 1) {
1972           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1973           /* find first untouched local node */
1974           j = 0;
1975           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1976           min_index = row_cmat_global_indices[j];
1977           min_loc = j;
1978           /* search the minimum among nodes not yet touched on the connected component
1979              since there can be more than one constraint on a single cc */
1980           for (j=1;j<size_of_constraint;j++) {
1981             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1982               min_index = row_cmat_global_indices[j];
1983               min_loc = j;
1984             }
1985           }
1986           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1987           constraints_index[n++] = row_cmat_indices[min_loc];
1988         }
1989         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1990       }
1991       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1992       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1993     }
1994   }
1995   *n_constraints = n;
1996   if (constraints_idx) *constraints_idx = constraints_index;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 /* the next two functions has been adapted from pcis.c */
2001 #undef __FUNCT__
2002 #define __FUNCT__ "PCBDDCApplySchur"
2003 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2004 {
2005   PetscErrorCode ierr;
2006   PC_IS          *pcis = (PC_IS*)(pc->data);
2007 
2008   PetscFunctionBegin;
2009   if (!vec2_B) { vec2_B = v; }
2010   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2011   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2012   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2013   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2014   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 #undef __FUNCT__
2019 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2020 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2021 {
2022   PetscErrorCode ierr;
2023   PC_IS          *pcis = (PC_IS*)(pc->data);
2024 
2025   PetscFunctionBegin;
2026   if (!vec2_B) { vec2_B = v; }
2027   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2028   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2029   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2030   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2031   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PCBDDCSubsetNumbering"
2037 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2038 {
2039   Vec            local_vec,global_vec;
2040   IS             seqis,paris;
2041   VecScatter     scatter_ctx;
2042   PetscScalar    *array;
2043   PetscInt       *temp_global_dofs;
2044   PetscScalar    globalsum;
2045   PetscInt       i,j,s;
2046   PetscInt       nlocals,first_index,old_index,max_local;
2047   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2048   PetscMPIInt    *dof_sizes,*dof_displs;
2049   PetscBool      first_found;
2050   PetscErrorCode ierr;
2051 
2052   PetscFunctionBegin;
2053   /* mpi buffers */
2054   MPI_Comm_size(comm,&size_prec_comm);
2055   MPI_Comm_rank(comm,&rank_prec_comm);
2056   j = ( !rank_prec_comm ? size_prec_comm : 0);
2057   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2058   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2059   /* get maximum size of subset */
2060   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2061   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2062   max_local = 0;
2063   if (n_local_dofs) {
2064     max_local = temp_global_dofs[0];
2065     for (i=1;i<n_local_dofs;i++) {
2066       if (max_local < temp_global_dofs[i] ) {
2067         max_local = temp_global_dofs[i];
2068       }
2069     }
2070   }
2071   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2072   max_global++;
2073   max_local = 0;
2074   if (n_local_dofs) {
2075     max_local = local_dofs[0];
2076     for (i=1;i<n_local_dofs;i++) {
2077       if (max_local < local_dofs[i] ) {
2078         max_local = local_dofs[i];
2079       }
2080     }
2081   }
2082   max_local++;
2083   /* allocate workspace */
2084   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2085   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2086   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2087   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2088   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2089   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2090   /* create scatter */
2091   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2092   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2093   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2094   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2095   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2096   /* init array */
2097   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2098   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2099   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2100   if (local_dofs_mult) {
2101     for (i=0;i<n_local_dofs;i++) {
2102       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2103     }
2104   } else {
2105     for (i=0;i<n_local_dofs;i++) {
2106       array[local_dofs[i]]=1.0;
2107     }
2108   }
2109   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2110   /* scatter into global vec and get total number of global dofs */
2111   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2112   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2113   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2114   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2115   /* Fill global_vec with cumulative function for global numbering */
2116   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2117   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2118   nlocals = 0;
2119   first_index = -1;
2120   first_found = PETSC_FALSE;
2121   for (i=0;i<s;i++) {
2122     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2123       first_found = PETSC_TRUE;
2124       first_index = i;
2125     }
2126     nlocals += (PetscInt)PetscRealPart(array[i]);
2127   }
2128   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2129   if (!rank_prec_comm) {
2130     dof_displs[0]=0;
2131     for (i=1;i<size_prec_comm;i++) {
2132       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2133     }
2134   }
2135   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2136   if (first_found) {
2137     array[first_index] += (PetscScalar)nlocals;
2138     old_index = first_index;
2139     for (i=first_index+1;i<s;i++) {
2140       if (PetscRealPart(array[i]) > 0.0) {
2141         array[i] += array[old_index];
2142         old_index = i;
2143       }
2144     }
2145   }
2146   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2147   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2148   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2149   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2150   /* get global ordering of local dofs */
2151   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2152   if (local_dofs_mult) {
2153     for (i=0;i<n_local_dofs;i++) {
2154       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2155     }
2156   } else {
2157     for (i=0;i<n_local_dofs;i++) {
2158       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2159     }
2160   }
2161   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2162   /* free workspace */
2163   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2164   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2165   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2166   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2167   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2168   /* return pointer to global ordering of local dofs */
2169   *global_numbering_subset = temp_global_dofs;
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2175 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2176 {
2177   PetscInt       i,j;
2178   PetscScalar    *alphas;
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   /* this implements stabilized Gram-Schmidt */
2183   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2184   for (i=0;i<n;i++) {
2185     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2186     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2187     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2188   }
2189   ierr = PetscFree(alphas);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 /* BDDC requires metis 5.0.1 for multilevel */
2194 #if defined(PETSC_HAVE_METIS)
2195 #include "metis.h"
2196 #define MetisInt    idx_t
2197 #define MetisScalar real_t
2198 #endif
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2202 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2203 {
2204   PC_BDDC   *pcbddc   = (PC_BDDC*)pc->data;
2205   PC_IS     *pcis     = (PC_IS*)pc->data;
2206   MPI_Comm  prec_comm;
2207   MPI_Comm  coarse_comm;
2208   Mat coarse_mat;
2209 
2210   PetscInt  coarse_size;
2211 
2212   MatNullSpace CoarseNullSpace;
2213 
2214   /* common to all choiches */
2215   PetscScalar *temp_coarse_mat_vals;
2216   PetscScalar *ins_coarse_mat_vals;
2217   PetscInt    *ins_local_primal_indices;
2218   PetscMPIInt *localsizes2,*localdispl2;
2219   PetscMPIInt size_prec_comm;
2220   PetscMPIInt rank_prec_comm;
2221   PetscMPIInt active_rank=MPI_PROC_NULL;
2222   PetscMPIInt master_proc=0;
2223   PetscInt    ins_local_primal_size;
2224   /* specific to MULTILEVEL_BDDC */
2225   PetscMPIInt *ranks_recv=0;
2226   PetscMPIInt count_recv=0;
2227   PetscMPIInt rank_coarse_proc_send_to=-1;
2228   PetscMPIInt coarse_color = MPI_UNDEFINED;
2229   ISLocalToGlobalMapping coarse_ISLG;
2230   /* some other variables */
2231   PetscErrorCode ierr;
2232   MatType coarse_mat_type;
2233   PCType  coarse_pc_type;
2234   KSPType coarse_ksp_type;
2235   PC pc_temp;
2236   PetscInt i,j,k;
2237   PetscInt max_it_coarse_ksp=1;  /* don't increase this value */
2238   /* verbose output viewer */
2239   PetscViewer viewer=pcbddc->dbg_viewer;
2240   PetscInt    dbg_flag=pcbddc->dbg_flag;
2241 
2242   PetscInt      offset,offset2;
2243   PetscMPIInt   im_active,active_procs;
2244   PetscInt      *dnz,*onz;
2245 
2246   PetscBool     setsym,issym=PETSC_FALSE;
2247 
2248   PetscInt      *replicated_local_primal_indices=0,*local_primal_indices=0,*local_primal_displacements=0,*local_primal_sizes=0;
2249   PetscInt      replicated_primal_size=0;
2250 
2251   PetscFunctionBegin;
2252   ierr = PetscObjectGetComm((PetscObject)pc,&prec_comm);CHKERRQ(ierr);
2253   ins_local_primal_indices = 0;
2254   ins_coarse_mat_vals      = 0;
2255   localsizes2              = 0;
2256   localdispl2              = 0;
2257   temp_coarse_mat_vals     = 0;
2258   coarse_ISLG              = 0;
2259 
2260   ierr = MPI_Comm_size(prec_comm,&size_prec_comm);CHKERRQ(ierr);
2261   ierr = MPI_Comm_rank(prec_comm,&rank_prec_comm);CHKERRQ(ierr);
2262   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2263 
2264   if (pcbddc->coarse_problem_type == SEQUENTIAL_BDDC || pcbddc->coarse_problem_type == REPLICATED_BDDC) {
2265     pcbddc->coarse_problem_type = PARALLEL_BDDC;
2266     pcbddc->coarse_communications_type = SCATTERS_BDDC;
2267   }
2268 
2269   /* Assign global numbering to coarse dofs */
2270   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2271 
2272   im_active = 0;
2273   if (pcis->n) im_active = 1;
2274   ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,prec_comm);CHKERRQ(ierr);
2275 
2276   /* adapt coarse problem type */
2277 #if defined(PETSC_HAVE_METIS)
2278   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2279     if (pcbddc->current_level < pcbddc->max_levels) {
2280       if ( (active_procs/pcbddc->coarsening_ratio) < 2 ) {
2281         if (dbg_flag) {
2282           ierr = PetscViewerASCIIPrintf(viewer,"Not enough active processes on level %d (active %d,ratio %d). Parallel direct solve for coarse problem\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2283          ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2284         }
2285         pcbddc->coarse_problem_type = PARALLEL_BDDC;
2286       }
2287     } else {
2288       if (dbg_flag) {
2289         ierr = PetscViewerASCIIPrintf(viewer,"Max number of levels reached. Using parallel direct solve for coarse problem\n",pcbddc->max_levels,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2290         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2291       }
2292       pcbddc->coarse_problem_type = PARALLEL_BDDC;
2293     }
2294   }
2295 #else
2296   pcbddc->coarse_problem_type = PARALLEL_BDDC;
2297 #endif
2298 
2299   /* OLD version from here */
2300 
2301   /* Construct needed data structures for message passing */
2302   j = 0;
2303   if (rank_prec_comm == 0 || pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2304     j = size_prec_comm;
2305   }
2306   ierr = PetscMalloc(j*sizeof(*local_primal_sizes),&local_primal_sizes);CHKERRQ(ierr);
2307   ierr = PetscMalloc(j*sizeof(*local_primal_displacements),&local_primal_displacements);CHKERRQ(ierr);
2308   /* Gather local_primal_size information for all processes  */
2309   if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2310     ierr = MPI_Allgather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,prec_comm);CHKERRQ(ierr);
2311   } else {
2312     ierr = MPI_Gather(&pcbddc->local_primal_size,1,MPIU_INT,&local_primal_sizes[0],1,MPIU_INT,0,prec_comm);CHKERRQ(ierr);
2313   }
2314   replicated_primal_size = 0;
2315   for (i=0; i<j; i++) {
2316     local_primal_displacements[i] = replicated_primal_size ;
2317     replicated_primal_size += local_primal_sizes[i];
2318   }
2319 
2320   switch(pcbddc->coarse_problem_type){
2321 
2322     case(MULTILEVEL_BDDC):   /* we define a coarse mesh where subdomains are elements */
2323     {
2324 #if defined(PETSC_HAVE_METIS)
2325       /* we need additional variables */
2326       MetisInt    n_subdomains,n_parts,objval,ncon,faces_nvtxs;
2327       MetisInt    *metis_coarse_subdivision;
2328       MetisInt    options[METIS_NOPTIONS];
2329       PetscMPIInt size_coarse_comm,rank_coarse_comm;
2330       PetscMPIInt procs_jumps_coarse_comm;
2331       PetscMPIInt *coarse_subdivision;
2332       PetscMPIInt *total_count_recv;
2333       PetscMPIInt *total_ranks_recv;
2334       PetscMPIInt *displacements_recv;
2335       PetscMPIInt *my_faces_connectivity;
2336       PetscMPIInt *petsc_faces_adjncy;
2337       MetisInt    *faces_adjncy;
2338       MetisInt    *faces_xadj;
2339       PetscMPIInt *number_of_faces;
2340       PetscMPIInt *faces_displacements;
2341       PetscInt    *array_int;
2342       PetscMPIInt my_faces=0;
2343       PetscMPIInt total_faces=0;
2344       PetscInt    ranks_stretching_ratio;
2345 
2346       /* define some quantities */
2347       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2348       coarse_mat_type = MATIS;
2349       coarse_pc_type  = PCBDDC;
2350       coarse_ksp_type = KSPRICHARDSON;
2351 
2352       /* details of coarse decomposition */
2353       n_subdomains = active_procs;
2354       n_parts      = n_subdomains/pcbddc->coarsening_ratio;
2355       ranks_stretching_ratio = size_prec_comm/active_procs;
2356       procs_jumps_coarse_comm = pcbddc->coarsening_ratio*ranks_stretching_ratio;
2357 
2358 #if 0
2359       PetscMPIInt *old_ranks;
2360       PetscInt    *new_ranks,*jj,*ii;
2361       MatPartitioning mat_part;
2362       IS coarse_new_decomposition,is_numbering;
2363       PetscViewer viewer_test;
2364       MPI_Comm    test_coarse_comm;
2365       PetscMPIInt test_coarse_color;
2366       Mat         mat_adj;
2367       /* Create new communicator for coarse problem splitting the old one */
2368       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2369          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2370       test_coarse_color = ( im_active ? 0 : MPI_UNDEFINED );
2371       test_coarse_comm = MPI_COMM_NULL;
2372       ierr = MPI_Comm_split(prec_comm,test_coarse_color,rank_prec_comm,&test_coarse_comm);CHKERRQ(ierr);
2373       if (im_active) {
2374         ierr = PetscMalloc(n_subdomains*sizeof(PetscMPIInt),&old_ranks);
2375         ierr = PetscMalloc(size_prec_comm*sizeof(PetscInt),&new_ranks);
2376         ierr = MPI_Comm_rank(test_coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2377         ierr = MPI_Comm_size(test_coarse_comm,&j);CHKERRQ(ierr);
2378         ierr = MPI_Allgather(&rank_prec_comm,1,MPIU_INT,old_ranks,1,MPIU_INT,test_coarse_comm);CHKERRQ(ierr);
2379         for (i=0; i<size_prec_comm; i++) new_ranks[i] = -1;
2380         for (i=0; i<n_subdomains; i++) new_ranks[old_ranks[i]] = i;
2381         ierr = PetscViewerASCIIOpen(test_coarse_comm,"test_mat_part.out",&viewer_test);CHKERRQ(ierr);
2382         k = pcis->n_neigh-1;
2383         ierr = PetscMalloc(2*sizeof(PetscInt),&ii);
2384         ii[0]=0;
2385         ii[1]=k;
2386         ierr = PetscMalloc(k*sizeof(PetscInt),&jj);
2387         for (i=0; i<k; i++) jj[i]=new_ranks[pcis->neigh[i+1]];
2388         ierr = PetscSortInt(k,jj);CHKERRQ(ierr);
2389         ierr = MatCreateMPIAdj(test_coarse_comm,1,n_subdomains,ii,jj,NULL,&mat_adj);CHKERRQ(ierr);
2390         ierr = MatView(mat_adj,viewer_test);CHKERRQ(ierr);
2391         ierr = MatPartitioningCreate(test_coarse_comm,&mat_part);CHKERRQ(ierr);
2392         ierr = MatPartitioningSetAdjacency(mat_part,mat_adj);CHKERRQ(ierr);
2393         ierr = MatPartitioningSetFromOptions(mat_part);CHKERRQ(ierr);
2394         printf("Setting Nparts %d\n",n_parts);
2395         ierr = MatPartitioningSetNParts(mat_part,n_parts);CHKERRQ(ierr);
2396         ierr = MatPartitioningView(mat_part,viewer_test);CHKERRQ(ierr);
2397         ierr = MatPartitioningApply(mat_part,&coarse_new_decomposition);CHKERRQ(ierr);
2398         ierr = ISView(coarse_new_decomposition,viewer_test);CHKERRQ(ierr);
2399         ierr = ISPartitioningToNumbering(coarse_new_decomposition,&is_numbering);CHKERRQ(ierr);
2400         ierr = ISView(is_numbering,viewer_test);CHKERRQ(ierr);
2401         ierr = PetscViewerDestroy(&viewer_test);CHKERRQ(ierr);
2402         ierr = ISDestroy(&coarse_new_decomposition);CHKERRQ(ierr);
2403         ierr = ISDestroy(&is_numbering);CHKERRQ(ierr);
2404         ierr = MatPartitioningDestroy(&mat_part);CHKERRQ(ierr);
2405         ierr = PetscFree(old_ranks);CHKERRQ(ierr);
2406         ierr = PetscFree(new_ranks);CHKERRQ(ierr);
2407         ierr = MPI_Comm_free(&test_coarse_comm);CHKERRQ(ierr);
2408       }
2409 #endif
2410 
2411       /* build CSR graph of subdomains' connectivity */
2412       ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&array_int);CHKERRQ(ierr);
2413       ierr = PetscMemzero(array_int,pcis->n*sizeof(PetscInt));CHKERRQ(ierr);
2414       for (i=1;i<pcis->n_neigh;i++){/* i=1 so I don't count myself -> faces nodes counts to 1 */
2415         for (j=0;j<pcis->n_shared[i];j++){
2416           array_int[ pcis->shared[i][j] ]+=1;
2417         }
2418       }
2419       for (i=1;i<pcis->n_neigh;i++){
2420         for (j=0;j<pcis->n_shared[i];j++){
2421           if (array_int[ pcis->shared[i][j] ] > 0 ){
2422             my_faces++;
2423             break;
2424           }
2425         }
2426       }
2427 
2428       ierr = MPI_Reduce(&my_faces,&total_faces,1,MPIU_INT,MPI_SUM,master_proc,prec_comm);CHKERRQ(ierr);
2429       ierr = PetscMalloc (my_faces*sizeof(PetscInt),&my_faces_connectivity);CHKERRQ(ierr);
2430       my_faces=0;
2431       for (i=1;i<pcis->n_neigh;i++){
2432         for (j=0;j<pcis->n_shared[i];j++){
2433           if (array_int[ pcis->shared[i][j] ] > 0 ){
2434             my_faces_connectivity[my_faces]=pcis->neigh[i];
2435             my_faces++;
2436             break;
2437           }
2438         }
2439       }
2440       if (rank_prec_comm == master_proc) {
2441         ierr = PetscMalloc (total_faces*sizeof(PetscMPIInt),&petsc_faces_adjncy);CHKERRQ(ierr);
2442         ierr = PetscMalloc (size_prec_comm*sizeof(PetscMPIInt),&number_of_faces);CHKERRQ(ierr);
2443         ierr = PetscMalloc (total_faces*sizeof(MetisInt),&faces_adjncy);CHKERRQ(ierr);
2444         ierr = PetscMalloc ((n_subdomains+1)*sizeof(MetisInt),&faces_xadj);CHKERRQ(ierr);
2445         ierr = PetscMalloc ((size_prec_comm+1)*sizeof(PetscMPIInt),&faces_displacements);CHKERRQ(ierr);
2446       }
2447       ierr = MPI_Gather(&my_faces,1,MPIU_INT,&number_of_faces[0],1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2448       if (rank_prec_comm == master_proc) {
2449         faces_xadj[0]=0;
2450         faces_displacements[0]=0;
2451         j=0;
2452         for (i=1;i<size_prec_comm+1;i++) {
2453           faces_displacements[i]=faces_displacements[i-1]+number_of_faces[i-1];
2454           if (number_of_faces[i-1]) {
2455             j++;
2456             faces_xadj[j]=faces_xadj[j-1]+number_of_faces[i-1];
2457           }
2458         }
2459       }
2460       ierr = MPI_Gatherv(&my_faces_connectivity[0],my_faces,MPIU_INT,&petsc_faces_adjncy[0],number_of_faces,faces_displacements,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2461       ierr = PetscFree(my_faces_connectivity);CHKERRQ(ierr);
2462       ierr = PetscFree(array_int);CHKERRQ(ierr);
2463       if (rank_prec_comm == master_proc) {
2464         for (i=0;i<total_faces;i++) faces_adjncy[i]=(MetisInt)(petsc_faces_adjncy[i]/ranks_stretching_ratio); /* cast to MetisInt */
2465         ierr = PetscFree(faces_displacements);CHKERRQ(ierr);
2466         ierr = PetscFree(number_of_faces);CHKERRQ(ierr);
2467         ierr = PetscFree(petsc_faces_adjncy);CHKERRQ(ierr);
2468       }
2469 
2470       if ( rank_prec_comm == master_proc ) {
2471 
2472         PetscInt heuristic_for_metis=3;
2473 
2474         ncon=1;
2475         faces_nvtxs=n_subdomains;
2476         /* partition graoh induced by face connectivity */
2477         ierr = PetscMalloc (n_subdomains*sizeof(MetisInt),&metis_coarse_subdivision);CHKERRQ(ierr);
2478         ierr = METIS_SetDefaultOptions(options);
2479         /* we need a contiguous partition of the coarse mesh */
2480         options[METIS_OPTION_CONTIG]=1;
2481         options[METIS_OPTION_NITER]=30;
2482         if (pcbddc->coarsening_ratio > 1) {
2483           if (n_subdomains>n_parts*heuristic_for_metis) {
2484             options[METIS_OPTION_IPTYPE]=METIS_IPTYPE_EDGE;
2485             options[METIS_OPTION_OBJTYPE]=METIS_OBJTYPE_CUT;
2486             ierr = METIS_PartGraphKway(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2487             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphKway (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2488           } else {
2489             ierr = METIS_PartGraphRecursive(&faces_nvtxs,&ncon,faces_xadj,faces_adjncy,NULL,NULL,NULL,&n_parts,NULL,NULL,options,&objval,metis_coarse_subdivision);
2490             if (ierr != METIS_OK) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in METIS_PartGraphRecursive (metis error code %D) called from PCBDDCSetUpCoarseEnvironment\n",ierr);
2491           }
2492         } else {
2493           for (i=0;i<n_subdomains;i++) metis_coarse_subdivision[i]=i;
2494         }
2495         ierr = PetscFree(faces_xadj);CHKERRQ(ierr);
2496         ierr = PetscFree(faces_adjncy);CHKERRQ(ierr);
2497         ierr = PetscMalloc(size_prec_comm*sizeof(PetscMPIInt),&coarse_subdivision);CHKERRQ(ierr);
2498 
2499         /* copy/cast values avoiding possible type conflicts between PETSc, MPI and METIS */
2500         for (i=0;i<size_prec_comm;i++) coarse_subdivision[i]=MPI_PROC_NULL;
2501         for (i=0;i<n_subdomains;i++) coarse_subdivision[ranks_stretching_ratio*i]=(PetscInt)(metis_coarse_subdivision[i]);
2502         ierr = PetscFree(metis_coarse_subdivision);CHKERRQ(ierr);
2503       }
2504 
2505       /* Create new communicator for coarse problem splitting the old one */
2506       if ( !(rank_prec_comm%procs_jumps_coarse_comm) && rank_prec_comm < procs_jumps_coarse_comm*n_parts ){
2507         coarse_color=0;              /* for communicator splitting */
2508         active_rank=rank_prec_comm;  /* for insertion of matrix values */
2509       }
2510       /* procs with coarse_color = MPI_UNDEFINED will have coarse_comm = MPI_COMM_NULL (from mpi standards)
2511          key = rank_prec_comm -> keep same ordering of ranks from the old to the new communicator */
2512       ierr = MPI_Comm_split(prec_comm,coarse_color,rank_prec_comm,&coarse_comm);CHKERRQ(ierr);
2513 
2514       if ( coarse_color == 0 ) {
2515         ierr = MPI_Comm_size(coarse_comm,&size_coarse_comm);CHKERRQ(ierr);
2516         ierr = MPI_Comm_rank(coarse_comm,&rank_coarse_comm);CHKERRQ(ierr);
2517       } else {
2518         rank_coarse_comm = MPI_PROC_NULL;
2519       }
2520 
2521       /* master proc take care of arranging and distributing coarse information */
2522       if (rank_coarse_comm == master_proc) {
2523         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&displacements_recv);CHKERRQ(ierr);
2524         ierr = PetscMalloc (size_coarse_comm*sizeof(PetscMPIInt),&total_count_recv);CHKERRQ(ierr);
2525         ierr = PetscMalloc (n_subdomains*sizeof(PetscMPIInt),&total_ranks_recv);CHKERRQ(ierr);
2526         /* some initializations */
2527         displacements_recv[0]=0;
2528         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2529         /* count from how many processes the j-th process of the coarse decomposition will receive data */
2530         for (j=0;j<size_coarse_comm;j++) {
2531           for (i=0;i<size_prec_comm;i++) {
2532           if (coarse_subdivision[i]==j) total_count_recv[j]++;
2533           }
2534         }
2535         /* displacements needed for scatterv of total_ranks_recv */
2536       for (i=1; i<size_coarse_comm; i++) displacements_recv[i]=displacements_recv[i-1]+total_count_recv[i-1];
2537 
2538         /* Now fill properly total_ranks_recv -> each coarse process will receive the ranks (in prec_comm communicator) of its friend (sending) processes */
2539         ierr = PetscMemzero(total_count_recv,size_coarse_comm*sizeof(PetscMPIInt));CHKERRQ(ierr);
2540         for (j=0;j<size_coarse_comm;j++) {
2541           for (i=0;i<size_prec_comm;i++) {
2542             if (coarse_subdivision[i]==j) {
2543               total_ranks_recv[displacements_recv[j]+total_count_recv[j]]=i;
2544               total_count_recv[j]+=1;
2545             }
2546           }
2547         }
2548         /*for (j=0;j<size_coarse_comm;j++) {
2549           printf("process %d in new rank will receive from %d processes (original ranks follows)\n",j,total_count_recv[j]);
2550           for (i=0;i<total_count_recv[j];i++) {
2551             printf("%d ",total_ranks_recv[displacements_recv[j]+i]);
2552           }
2553           printf("\n");
2554         }*/
2555 
2556         /* identify new decomposition in terms of ranks in the old communicator */
2557         for (i=0;i<n_subdomains;i++) {
2558           coarse_subdivision[ranks_stretching_ratio*i]=coarse_subdivision[ranks_stretching_ratio*i]*procs_jumps_coarse_comm;
2559         }
2560         /*printf("coarse_subdivision in old end new ranks\n");
2561         for (i=0;i<size_prec_comm;i++)
2562           if (coarse_subdivision[i]!=MPI_PROC_NULL) {
2563             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]/procs_jumps_coarse_comm);
2564           } else {
2565             printf("%d=(%d %d), ",i,coarse_subdivision[i],coarse_subdivision[i]);
2566           }
2567         printf("\n");*/
2568       }
2569 
2570       /* Scatter new decomposition for send details */
2571       ierr = MPI_Scatter(&coarse_subdivision[0],1,MPIU_INT,&rank_coarse_proc_send_to,1,MPIU_INT,master_proc,prec_comm);CHKERRQ(ierr);
2572       /* Scatter receiving details to members of coarse decomposition */
2573       if ( coarse_color == 0) {
2574         ierr = MPI_Scatter(&total_count_recv[0],1,MPIU_INT,&count_recv,1,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2575         ierr = PetscMalloc (count_recv*sizeof(PetscMPIInt),&ranks_recv);CHKERRQ(ierr);
2576         ierr = MPI_Scatterv(&total_ranks_recv[0],total_count_recv,displacements_recv,MPIU_INT,&ranks_recv[0],count_recv,MPIU_INT,master_proc,coarse_comm);CHKERRQ(ierr);
2577       }
2578 
2579       /*printf("I will send my matrix data to proc  %d\n",rank_coarse_proc_send_to);
2580       if (coarse_color == 0) {
2581         printf("I will receive some matrix data from %d processes (ranks follows)\n",count_recv);
2582         for (i=0;i<count_recv;i++)
2583           printf("%d ",ranks_recv[i]);
2584         printf("\n");
2585       }*/
2586 
2587       if (rank_prec_comm == master_proc) {
2588         ierr = PetscFree(coarse_subdivision);CHKERRQ(ierr);
2589         ierr = PetscFree(total_count_recv);CHKERRQ(ierr);
2590         ierr = PetscFree(total_ranks_recv);CHKERRQ(ierr);
2591         ierr = PetscFree(displacements_recv);CHKERRQ(ierr);
2592       }
2593 #endif
2594       break;
2595     }
2596 
2597     case(PARALLEL_BDDC):
2598 
2599       pcbddc->coarse_communications_type = SCATTERS_BDDC;
2600       coarse_mat_type = MATAIJ;
2601       coarse_pc_type  = PCREDUNDANT;
2602       coarse_ksp_type  = KSPPREONLY;
2603       coarse_comm = prec_comm;
2604       active_rank = rank_prec_comm;
2605       break;
2606 
2607   }
2608 
2609   switch(pcbddc->coarse_communications_type){
2610 
2611     case(SCATTERS_BDDC):
2612       {
2613         if (pcbddc->coarse_problem_type==MULTILEVEL_BDDC) {
2614 
2615           IS coarse_IS;
2616 
2617           if(pcbddc->coarsening_ratio == 1) {
2618             ins_local_primal_size = pcbddc->local_primal_size;
2619             ins_local_primal_indices = local_primal_indices;
2620             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2621             /* nonzeros */
2622             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2623             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2624             for (i=0;i<ins_local_primal_size;i++) {
2625               dnz[i] = ins_local_primal_size;
2626             }
2627           } else {
2628             PetscMPIInt send_size;
2629             PetscMPIInt *send_buffer;
2630             PetscInt    *aux_ins_indices;
2631             PetscInt    ii,jj;
2632             MPI_Request *requests;
2633 
2634             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localdispl2);CHKERRQ(ierr);
2635             /* reusing local_primal_displacements and replicated_primal_size */
2636             ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
2637             ierr = PetscMalloc((count_recv+1)*sizeof(PetscMPIInt),&local_primal_displacements);CHKERRQ(ierr);
2638             replicated_primal_size = count_recv;
2639             j = 0;
2640             for (i=0;i<count_recv;i++) {
2641               local_primal_displacements[i] = j;
2642               j += local_primal_sizes[ranks_recv[i]];
2643             }
2644             local_primal_displacements[count_recv] = j;
2645             ierr = PetscMalloc(j*sizeof(PetscMPIInt),&replicated_local_primal_indices);CHKERRQ(ierr);
2646             /* allocate auxiliary space */
2647             ierr = PetscMalloc(count_recv*sizeof(PetscMPIInt),&localsizes2);CHKERRQ(ierr);
2648             ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&aux_ins_indices);CHKERRQ(ierr);
2649             ierr = PetscMemzero(aux_ins_indices,coarse_size*sizeof(PetscInt));CHKERRQ(ierr);
2650             /* allocate stuffs for message massing */
2651             ierr = PetscMalloc((count_recv+1)*sizeof(MPI_Request),&requests);CHKERRQ(ierr);
2652             for (i=0;i<count_recv+1;i++) { requests[i]=MPI_REQUEST_NULL; }
2653             /* send indices to be inserted */
2654             for (i=0;i<count_recv;i++) {
2655               send_size = local_primal_sizes[ranks_recv[i]];
2656               ierr = MPI_Irecv(&replicated_local_primal_indices[local_primal_displacements[i]],send_size,MPIU_INT,ranks_recv[i],999,prec_comm,&requests[i]);CHKERRQ(ierr);
2657             }
2658             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2659               send_size = pcbddc->local_primal_size;
2660               ierr = PetscMalloc(send_size*sizeof(PetscMPIInt),&send_buffer);CHKERRQ(ierr);
2661               for (i=0;i<send_size;i++) {
2662                 send_buffer[i]=(PetscMPIInt)local_primal_indices[i];
2663               }
2664               ierr = MPI_Isend(send_buffer,send_size,MPIU_INT,rank_coarse_proc_send_to,999,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2665             }
2666             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2667             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2668               ierr = PetscFree(send_buffer);CHKERRQ(ierr);
2669             }
2670             j = 0;
2671             for (i=0;i<count_recv;i++) {
2672               ii = local_primal_displacements[i+1]-local_primal_displacements[i];
2673               localsizes2[i] = ii*ii;
2674               localdispl2[i] = j;
2675               j += localsizes2[i];
2676               jj = local_primal_displacements[i];
2677               /* it counts the coarse subdomains sharing the coarse node */
2678               for (k=0;k<ii;k++) {
2679                 aux_ins_indices[replicated_local_primal_indices[jj+k]] += 1;
2680               }
2681             }
2682             /* temp_coarse_mat_vals used to store matrix values to be received */
2683             ierr = PetscMalloc(j*sizeof(PetscScalar),&temp_coarse_mat_vals);CHKERRQ(ierr);
2684             /* evaluate how many values I will insert in coarse mat */
2685             ins_local_primal_size = 0;
2686             for (i=0;i<coarse_size;i++) {
2687               if (aux_ins_indices[i]) {
2688                 ins_local_primal_size++;
2689               }
2690             }
2691             /* evaluate indices I will insert in coarse mat */
2692             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2693             j = 0;
2694             for(i=0;i<coarse_size;i++) {
2695               if(aux_ins_indices[i]) {
2696                 ins_local_primal_indices[j] = i;
2697                 j++;
2698               }
2699             }
2700             /* processes partecipating in coarse problem receive matrix data from their friends */
2701             for (i=0;i<count_recv;i++) {
2702               ierr = MPI_Irecv(&temp_coarse_mat_vals[localdispl2[i]],localsizes2[i],MPIU_SCALAR,ranks_recv[i],666,prec_comm,&requests[i]);CHKERRQ(ierr);
2703             }
2704             if (rank_coarse_proc_send_to != MPI_PROC_NULL ) {
2705               send_size = pcbddc->local_primal_size*pcbddc->local_primal_size;
2706               ierr = MPI_Isend(&coarse_submat_vals[0],send_size,MPIU_SCALAR,rank_coarse_proc_send_to,666,prec_comm,&requests[count_recv]);CHKERRQ(ierr);
2707             }
2708             ierr = MPI_Waitall(count_recv+1,requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2709             /* nonzeros */
2710             ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&dnz);CHKERRQ(ierr);
2711             ierr = PetscMemzero(dnz,ins_local_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2712             /* use aux_ins_indices to realize a global to local mapping */
2713             j=0;
2714             for(i=0;i<coarse_size;i++){
2715               if(aux_ins_indices[i]==0){
2716                 aux_ins_indices[i]=-1;
2717               } else {
2718                 aux_ins_indices[i]=j;
2719                 j++;
2720               }
2721             }
2722             for (i=0;i<count_recv;i++) {
2723               j = local_primal_sizes[ranks_recv[i]];
2724               for (k=0;k<j;k++) {
2725                 dnz[aux_ins_indices[replicated_local_primal_indices[local_primal_displacements[i]+k]]] += j;
2726               }
2727             }
2728             /* check */
2729             for (i=0;i<ins_local_primal_size;i++) {
2730               if (dnz[i] > ins_local_primal_size) {
2731                 dnz[i] = ins_local_primal_size;
2732               }
2733             }
2734             ierr = PetscFree(requests);CHKERRQ(ierr);
2735             ierr = PetscFree(aux_ins_indices);CHKERRQ(ierr);
2736             if (coarse_color == 0) { ierr = PetscFree(ranks_recv);CHKERRQ(ierr); }
2737           }
2738           /* create local to global mapping needed by coarse MATIS */
2739           if (coarse_comm != MPI_COMM_NULL ) {ierr = MPI_Comm_free(&coarse_comm);CHKERRQ(ierr);}
2740           coarse_comm = prec_comm;
2741           active_rank = rank_prec_comm;
2742           ierr = ISCreateGeneral(coarse_comm,ins_local_primal_size,ins_local_primal_indices,PETSC_COPY_VALUES,&coarse_IS);CHKERRQ(ierr);
2743           ierr = ISLocalToGlobalMappingCreateIS(coarse_IS,&coarse_ISLG);CHKERRQ(ierr);
2744           ierr = ISDestroy(&coarse_IS);CHKERRQ(ierr);
2745         } else if (pcbddc->coarse_problem_type==PARALLEL_BDDC) {
2746           /* arrays for values insertion */
2747           ins_local_primal_size = pcbddc->local_primal_size;
2748           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2749           ierr = PetscMalloc(ins_local_primal_size*ins_local_primal_size*sizeof(PetscScalar),&ins_coarse_mat_vals);CHKERRQ(ierr);
2750           for (j=0;j<ins_local_primal_size;j++){
2751             ins_local_primal_indices[j]=local_primal_indices[j];
2752             for (i=0;i<ins_local_primal_size;i++) {
2753               ins_coarse_mat_vals[j*ins_local_primal_size+i]=coarse_submat_vals[j*ins_local_primal_size+i];
2754             }
2755           }
2756         }
2757         break;
2758 
2759     }
2760 
2761   }
2762 
2763   /* Now create and fill up coarse matrix */
2764   if ( rank_prec_comm == active_rank ) {
2765 
2766     Mat matis_coarse_local_mat;
2767 
2768     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2769       ierr = MatCreate(coarse_comm,&coarse_mat);CHKERRQ(ierr);
2770       ierr = MatSetSizes(coarse_mat,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size);CHKERRQ(ierr);
2771       ierr = MatSetType(coarse_mat,coarse_mat_type);CHKERRQ(ierr);
2772       ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr);
2773       ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr);
2774       ierr = MatSetUp(coarse_mat);CHKERRQ(ierr);
2775       ierr = MatSetOption(coarse_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2776       ierr = MatSetOption(coarse_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2777     } else {
2778       ierr = MatCreateIS(coarse_comm,1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_ISLG,&coarse_mat);CHKERRQ(ierr);
2779       ierr = MatSetUp(coarse_mat);CHKERRQ(ierr);
2780       ierr = MatISGetLocalMat(coarse_mat,&matis_coarse_local_mat);CHKERRQ(ierr);
2781       ierr = MatSetOptionsPrefix(coarse_mat,"coarse_");CHKERRQ(ierr);
2782       ierr = MatSetFromOptions(coarse_mat);CHKERRQ(ierr);
2783       ierr = MatSetUp(matis_coarse_local_mat);CHKERRQ(ierr);
2784       ierr = MatSetOption(matis_coarse_local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); /* local values stored in column major */
2785       ierr = MatSetOption(matis_coarse_local_mat,MAT_IGNORE_ZERO_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
2786     }
2787     /* preallocation */
2788     if (pcbddc->coarse_problem_type != MULTILEVEL_BDDC) {
2789 
2790       PetscInt lrows,lcols,bs;
2791 
2792       ierr = MatGetLocalSize(coarse_mat,&lrows,&lcols);CHKERRQ(ierr);
2793       ierr = MatPreallocateInitialize(coarse_comm,lrows,lcols,dnz,onz);CHKERRQ(ierr);
2794       ierr = MatGetBlockSize(coarse_mat,&bs);CHKERRQ(ierr);
2795 
2796       if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2797 
2798         Vec         vec_dnz,vec_onz;
2799         PetscScalar *my_dnz,*my_onz,*array;
2800         PetscInt    *mat_ranges,*row_ownership;
2801         PetscInt    coarse_index_row,coarse_index_col,owner;
2802 
2803         ierr = VecCreate(prec_comm,&vec_dnz);CHKERRQ(ierr);
2804         ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2805         ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,coarse_size);CHKERRQ(ierr);
2806         ierr = VecSetType(vec_dnz,VECMPI);CHKERRQ(ierr);
2807         ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2808 
2809         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_dnz);CHKERRQ(ierr);
2810         ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscScalar),&my_onz);CHKERRQ(ierr);
2811         ierr = PetscMemzero(my_dnz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
2812         ierr = PetscMemzero(my_onz,pcbddc->local_primal_size*sizeof(PetscScalar));CHKERRQ(ierr);
2813 
2814         ierr = PetscMalloc(coarse_size*sizeof(PetscInt),&row_ownership);CHKERRQ(ierr);
2815         ierr = MatGetOwnershipRanges(coarse_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2816         for (i=0;i<size_prec_comm;i++) {
2817           for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2818             row_ownership[j]=i;
2819           }
2820         }
2821 
2822         for (i=0;i<pcbddc->local_primal_size;i++) {
2823           coarse_index_row = local_primal_indices[i];
2824           owner = row_ownership[coarse_index_row];
2825           for (j=i;j<pcbddc->local_primal_size;j++) {
2826             owner = row_ownership[coarse_index_row];
2827             coarse_index_col = local_primal_indices[j];
2828             if (coarse_index_col > mat_ranges[owner]-1 && coarse_index_col < mat_ranges[owner+1] ) {
2829               my_dnz[i] += 1.0;
2830             } else {
2831               my_onz[i] += 1.0;
2832             }
2833             if (i != j) {
2834               owner = row_ownership[coarse_index_col];
2835               if (coarse_index_row > mat_ranges[owner]-1 && coarse_index_row < mat_ranges[owner+1] ) {
2836                 my_dnz[j] += 1.0;
2837               } else {
2838                 my_onz[j] += 1.0;
2839               }
2840             }
2841           }
2842         }
2843         ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2844         ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2845         if (pcbddc->local_primal_size) {
2846           ierr = VecSetValues(vec_dnz,pcbddc->local_primal_size,local_primal_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2847           ierr = VecSetValues(vec_onz,pcbddc->local_primal_size,local_primal_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2848         }
2849         ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2850         ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2851         ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2852         ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2853         j = mat_ranges[rank_prec_comm+1]-mat_ranges[rank_prec_comm];
2854         ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2855         for (i=0; i<j; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2856 
2857         ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2858         ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2859         for (i=0;i<j;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2860 
2861         ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2862         ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2863         ierr = PetscFree(my_onz);CHKERRQ(ierr);
2864         ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2865         ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2866         ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2867       }
2868 
2869       /* check */
2870       for (i=0;i<lrows;i++) {
2871         if (dnz[i]>lcols) dnz[i]=lcols;
2872         if (onz[i]>coarse_size-lcols) onz[i]=coarse_size-lcols;
2873       }
2874       ierr = MatSeqAIJSetPreallocation(coarse_mat,0,dnz);CHKERRQ(ierr);
2875       ierr = MatMPIAIJSetPreallocation(coarse_mat,0,dnz,0,onz);CHKERRQ(ierr);
2876       ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2877     } else {
2878       ierr = MatSeqAIJSetPreallocation(matis_coarse_local_mat,0,dnz);CHKERRQ(ierr);
2879       ierr = PetscFree(dnz);CHKERRQ(ierr);
2880     }
2881     /* insert values */
2882     if (pcbddc->coarse_problem_type == PARALLEL_BDDC) {
2883       ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
2884     } else if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2885       if (pcbddc->coarsening_ratio == 1) {
2886         ins_coarse_mat_vals = coarse_submat_vals;
2887         ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,INSERT_VALUES);CHKERRQ(ierr);
2888       } else {
2889         ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
2890         for (k=0;k<replicated_primal_size;k++) {
2891           offset = local_primal_displacements[k];
2892           offset2 = localdispl2[k];
2893           ins_local_primal_size = local_primal_displacements[k+1]-local_primal_displacements[k];
2894           ierr = PetscMalloc(ins_local_primal_size*sizeof(PetscInt),&ins_local_primal_indices);CHKERRQ(ierr);
2895           for (j=0;j<ins_local_primal_size;j++){
2896             ins_local_primal_indices[j]=(PetscInt)replicated_local_primal_indices[offset+j];
2897           }
2898           ins_coarse_mat_vals = &temp_coarse_mat_vals[offset2];
2899           ierr = MatSetValues(coarse_mat,ins_local_primal_size,ins_local_primal_indices,ins_local_primal_size,ins_local_primal_indices,ins_coarse_mat_vals,ADD_VALUES);CHKERRQ(ierr);
2900           ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr);
2901         }
2902       }
2903       ins_local_primal_indices = 0;
2904       ins_coarse_mat_vals = 0;
2905     }
2906     ierr = MatAssemblyBegin(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907     ierr = MatAssemblyEnd(coarse_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2908     /* symmetry of coarse matrix */
2909     if (issym) {
2910       ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2911     }
2912     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2913   }
2914 
2915   /* create loc to glob scatters if needed */
2916   if (pcbddc->coarse_communications_type == SCATTERS_BDDC) {
2917      IS local_IS,global_IS;
2918      ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size,0,1,&local_IS);CHKERRQ(ierr);
2919      ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&global_IS);CHKERRQ(ierr);
2920      ierr = VecScatterCreate(pcbddc->vec1_P,local_IS,pcbddc->coarse_vec,global_IS,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2921      ierr = ISDestroy(&local_IS);CHKERRQ(ierr);
2922      ierr = ISDestroy(&global_IS);CHKERRQ(ierr);
2923   }
2924 
2925   /* free memory no longer needed */
2926   if (coarse_ISLG)              { ierr = ISLocalToGlobalMappingDestroy(&coarse_ISLG);CHKERRQ(ierr); }
2927   if (ins_local_primal_indices) { ierr = PetscFree(ins_local_primal_indices);CHKERRQ(ierr); }
2928   if (ins_coarse_mat_vals)      { ierr = PetscFree(ins_coarse_mat_vals);CHKERRQ(ierr); }
2929   if (localsizes2)              { ierr = PetscFree(localsizes2);CHKERRQ(ierr); }
2930   if (localdispl2)              { ierr = PetscFree(localdispl2);CHKERRQ(ierr); }
2931   if (temp_coarse_mat_vals)     { ierr = PetscFree(temp_coarse_mat_vals);CHKERRQ(ierr); }
2932 
2933   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2934   ierr = PetscFree(local_primal_sizes);CHKERRQ(ierr);
2935   ierr = PetscFree(local_primal_displacements);CHKERRQ(ierr);
2936   ierr = PetscFree(replicated_local_primal_indices);CHKERRQ(ierr);
2937 
2938   /* Compute coarse null space */
2939   CoarseNullSpace = 0;
2940   if (pcbddc->NullSpace) {
2941     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2942   }
2943 
2944   /* KSP for coarse problem */
2945   if (rank_prec_comm == active_rank) {
2946     PetscBool isbddc=PETSC_FALSE;
2947 
2948     ierr = KSPCreate(coarse_comm,&pcbddc->coarse_ksp);CHKERRQ(ierr);
2949     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2950     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2951     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,max_it_coarse_ksp);CHKERRQ(ierr);
2952     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2953     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2954     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2955     /* Allow user's customization */
2956     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
2957     /* Set Up PC for coarse problem BDDC */
2958     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
2959       i = pcbddc->current_level+1;
2960       ierr = PCBDDCSetLevel(pc_temp,i);CHKERRQ(ierr);
2961       ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2962       ierr = PCBDDCSetMaxLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2963       ierr = PCBDDCSetCoarseProblemType(pc_temp,MULTILEVEL_BDDC);CHKERRQ(ierr);
2964       if (CoarseNullSpace) {
2965         ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2966       }
2967       if (dbg_flag) {
2968         ierr = PetscViewerASCIIPrintf(viewer,"----------------Level %d: Setting up level %d---------------\n",pcbddc->current_level,i);CHKERRQ(ierr);
2969         ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2970       }
2971     } else {
2972       if (CoarseNullSpace) {
2973         ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2974       }
2975     }
2976     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2977     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2978 
2979     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&j);CHKERRQ(ierr);
2980     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2981     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2982     if (j == 1) {
2983       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2984       if (isbddc) {
2985         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
2986       }
2987     }
2988   }
2989   /* Check coarse problem if requested */
2990   if ( dbg_flag && rank_prec_comm == active_rank ) {
2991     KSP check_ksp;
2992     PC  check_pc;
2993     Vec check_vec;
2994     PetscReal   abs_infty_error,infty_error,lambda_min,lambda_max;
2995     KSPType check_ksp_type;
2996 
2997     /* Create ksp object suitable for extreme eigenvalues' estimation */
2998     ierr = KSPCreate(coarse_comm,&check_ksp);CHKERRQ(ierr);
2999     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3000     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
3001     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3002       if (issym) check_ksp_type = KSPCG;
3003       else check_ksp_type = KSPGMRES;
3004       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
3005     } else {
3006       check_ksp_type = KSPPREONLY;
3007     }
3008     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3009     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3010     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3011     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3012     /* create random vec */
3013     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3014     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3015     if (CoarseNullSpace) {
3016       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3017     }
3018     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3019     /* solve coarse problem */
3020     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3021     if (CoarseNullSpace) {
3022       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3023     }
3024     /* check coarse problem residual error */
3025     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3026     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3027     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3028     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3029     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3030     /* get eigenvalue estimation if inexact */
3031     if (pcbddc->coarse_problem_type == MULTILEVEL_BDDC) {
3032       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3033       ierr = KSPGetIterationNumber(check_ksp,&k);CHKERRQ(ierr);
3034       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues estimated with %d iterations of %s.\n",k,check_ksp_type);CHKERRQ(ierr);
3035       ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem eigenvalues: % 1.14e %1.14e\n",lambda_min,lambda_max);CHKERRQ(ierr);
3036     }
3037     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
3038     ierr = PetscViewerASCIIPrintf(viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
3039     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3040   }
3041   if (dbg_flag) {
3042     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
3043   }
3044   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3045   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 #undef __FUNCT__
3050 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3051 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3052 {
3053   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3054   PC_IS*         pcis = (PC_IS*)pc->data;
3055   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3056   PetscInt       i,j,coarse_size;
3057   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3058   PetscErrorCode ierr;
3059 
3060   PetscFunctionBegin;
3061   /* get indices in local ordering for vertices and constraints */
3062   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3063   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3064   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3065   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3066   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3067   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3068   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3069 
3070   /* Compute global number of coarse dofs */
3071   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3072 
3073   /* check numbering */
3074   if (pcbddc->dbg_flag) {
3075     PetscScalar coarsesum,*array;
3076 
3077     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3078     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3079     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3080     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3081     for (i=0;i<pcbddc->local_primal_size;i++) {
3082       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3083     }
3084     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3085     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3086     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3087     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3088     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3089     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3090     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3091     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3092     for (i=0;i<pcis->n;i++) {
3093       if (array[i] == 1.0) {
3094         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3095       }
3096     }
3097     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3098     for (i=0;i<pcis->n;i++) {
3099       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3100     }
3101     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3102     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3103     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3104     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3105     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3106     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3107     if (pcbddc->dbg_flag > 1) {
3108       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3109       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3110       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3111       for (i=0;i<pcbddc->local_primal_size;i++) {
3112         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3113       }
3114       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3115     }
3116     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3117   }
3118   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3119   /* get back data */
3120   *coarse_size_n = coarse_size;
3121   *local_primal_indices_n = local_primal_indices;
3122   PetscFunctionReturn(0);
3123 }
3124 
3125