xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision f9eb5b7dd9df894bdc8b7c07630553a1ab9a0cd0)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "PCBDDCCreateWorkVectors"
110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
111 {
112   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
113   PC_IS          *pcis = (PC_IS*)pc->data;
114   VecType        impVecType;
115   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
120   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
121   local_primal_size = n_constraints+n_vertices;
122   n_R = pcis->n-n_vertices;
123   /* local work vectors */
124   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
125   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
126   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
127   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
128   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
129   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
130   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
131   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
132   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
133   if (n_constraints) {
134     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
135     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
136     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
144 {
145   PetscErrorCode         ierr;
146   /* pointers to pcis and pcbddc */
147   PC_IS*                 pcis = (PC_IS*)pc->data;
148   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
149   /* submatrices of local problem */
150   Mat                    A_RV,A_VR,A_VV;
151   /* working matrices */
152   Mat                    M1,M2,M3,C_CR;
153   /* working vectors */
154   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
155   /* additional working stuff */
156   IS                     is_aux;
157   ISLocalToGlobalMapping BtoNmap;
158   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
159   const PetscScalar      *array,*row_cmat_values;
160   const PetscInt         *row_cmat_indices,*idx_R_local;
161   PetscInt               *vertices,*idx_V_B,*auxindices;
162   PetscInt               n_vertices,n_constraints,size_of_constraint;
163   PetscInt               i,j,n_R,n_D,n_B;
164   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
165   /* Vector and matrix types */
166   VecType                impVecType;
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
184 
185   /* Allocating some extra storage just to be safe */
186   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
187   for (i=0;i<pcis->n;i++) auxindices[i]=i;
188 
189   /* vertices in boundary numbering */
190   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
191   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
192   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
193   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
194   if (i != n_vertices) {
195     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
196   }
197 
198   /* some work vectors on vertices and/or constraints */
199   if (n_vertices) {
200     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
201     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
202     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
203     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
204   }
205   if (n_constraints) {
206     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
208   }
209 
210   /* Precompute stuffs needed for preprocessing and application of BDDC*/
211   if (n_constraints) {
212     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
213     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
214     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
215     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
216 
217     /* Extract constraints on R nodes: C_{CR}  */
218     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
219     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
220     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
221 
222     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
223     for (i=0;i<n_constraints;i++) {
224       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
225       /* Get row of constraint matrix in R numbering */
226       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
227       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
228       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
229       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
230       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
231       /* Solve for row of constraint matrix in R numbering */
232       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
233       /* Set values in local_auxmat2 */
234       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
235       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
237     }
238     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
241 
242     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
243     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
244     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
245     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
246     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
247     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
248     ierr = MatSetUp(M1);CHKERRQ(ierr);
249     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
250     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
251     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
252     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
253     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
254     ierr = MatDestroy(&M2);CHKERRQ(ierr);
255     ierr = MatDestroy(&M3);CHKERRQ(ierr);
256     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
257     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
258   }
259 
260   /* Get submatrices from subdomain matrix */
261   if (n_vertices) {
262     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
263     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
264     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
265     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
266     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
267   }
268 
269   /* Matrix of coarse basis functions (local) */
270   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
271   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
272   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
273   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
274   if (pcbddc->switch_static || pcbddc->dbg_flag) {
275     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
276     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
277     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
279   }
280 
281   if (pcbddc->dbg_flag) {
282     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
283     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
284     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
285   }
286   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
287   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
288 
289   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
290 
291   /* vertices */
292   for (i=0;i<n_vertices;i++) {
293     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
294     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
295     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
296     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
297     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
298     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
299     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
300     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
301     if (n_constraints) {
302       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
303       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
304       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
305     }
306     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
307     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
308 
309     /* Set values in coarse basis function and subdomain part of coarse_mat */
310     /* coarse basis functions */
311     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
312     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
313     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
314     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
315     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
317     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
318     if (pcbddc->switch_static || pcbddc->dbg_flag) {
319       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
320       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
322       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
323       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
324     }
325     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
326     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
327     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
329     if (n_constraints) {
330       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
331       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
332       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
333     }
334 
335     /* check */
336     if (pcbddc->dbg_flag) {
337       /* assemble subdomain vector on local nodes */
338       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
339       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
340       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
341       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
342       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
343       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
344       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
345       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
346       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350       if (n_constraints) {
351         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
353         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354       }
355       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
357       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
358       /* check saddle point solution */
359       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
360       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
361       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
362       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
363       /* shift by the identity matrix */
364       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
365       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
366       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
367       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
368     }
369   }
370 
371   /* constraints */
372   for (i=0;i<n_constraints;i++) {
373     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
374     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
375     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
376     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
377     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
378     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
379     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
381     if (n_vertices) {
382       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
383     }
384     /* Set values in coarse basis function and subdomain part of coarse_mat */
385     /* coarse basis functions */
386     j = i+n_vertices; /* don't touch this! */
387     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
388     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
389     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
390     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
391     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
392     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
393     if (pcbddc->switch_static || pcbddc->dbg_flag) {
394       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
397       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
398       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
399     }
400     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
401     if (n_vertices) {
402       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
403       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
405     }
406     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
407     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
408     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
409 
410     if (pcbddc->dbg_flag) {
411       /* assemble subdomain vector on nodes */
412       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
413       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
414       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
416       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
417       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
418       /* assemble subdomain vector of lagrange multipliers */
419       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
420       if (n_vertices) {
421         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
422         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
423         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       }
425       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
430       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
431       /* check saddle point solution */
432       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
433       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
434       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
435       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
436       /* shift by the identity matrix */
437       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
441     }
442   }
443   /* call assembling routines for local coarse basis */
444   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
445   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
446   if (pcbddc->switch_static || pcbddc->dbg_flag) {
447     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
448     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
449   }
450 
451   /* compute other basis functions for non-symmetric problems */
452   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
453   if (!setsym || (setsym && !issym)) {
454     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
455     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
456     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
457     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
458     if (pcbddc->switch_static || pcbddc->dbg_flag) {
459       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
460       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
461       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
462       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
463     }
464     for (i=0;i<pcbddc->local_primal_size;i++) {
465       if (n_constraints) {
466         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
467         for (j=0;j<n_constraints;j++) {
468           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
469         }
470         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
471         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
472       }
473       if (i<n_vertices) {
474         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
475         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
476         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
477         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
478         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
479         if (n_constraints) {
480           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
481         }
482       } else {
483         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
484       }
485       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
486       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
487       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
490       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
491       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
492       if (i<n_vertices) {
493         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
494       }
495       if (pcbddc->switch_static || pcbddc->dbg_flag) {
496         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
498         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
499         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
500         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
501       }
502 
503       if (pcbddc->dbg_flag) {
504         /* assemble subdomain vector on nodes */
505         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
506         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
507         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
509         if (i<n_vertices) {
510           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
511         }
512         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
513         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
514         /* assemble subdomain vector of lagrange multipliers */
515         for (j=0;j<pcbddc->local_primal_size;j++) {
516           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
517         }
518         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520         /* check saddle point solution */
521         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
522         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
523         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
524         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
525         /* shift by the identity matrix */
526         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
527         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
528         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
529         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
530       }
531     }
532     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534     if (pcbddc->switch_static || pcbddc->dbg_flag) {
535       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
537     }
538   }
539   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
540   /* Checking coarse_sub_mat and coarse basis functios */
541   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
542   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
543   if (pcbddc->dbg_flag) {
544     Mat         coarse_sub_mat;
545     Mat         AUXMAT,TM1,TM2,TM3,TM4;
546     Mat         coarse_phi_D,coarse_phi_B;
547     Mat         coarse_psi_D,coarse_psi_B;
548     Mat         A_II,A_BB,A_IB,A_BI;
549     MatType     checkmattype=MATSEQAIJ;
550     PetscReal   real_value;
551 
552     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
553     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
554     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
555     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
556     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
557     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
558     if (pcbddc->coarse_psi_B) {
559       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
560       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
561     }
562     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
563 
564     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
565     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
566     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
567     if (pcbddc->coarse_psi_B) {
568       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
569       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
570       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
571       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
572       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
573       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
574       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
575       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
576       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
577       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
578       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
579       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
580     } else {
581       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
583       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589     }
590     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
591     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
592     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
593     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
594     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
595     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
596     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
597     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
598     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
599     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
600     for (i=0;i<pcbddc->local_primal_size;i++) {
601       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
602     }
603     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
604     for (i=0;i<pcbddc->local_primal_size;i++) {
605       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
606     }
607     if (pcbddc->coarse_psi_B) {
608       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
609       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
610         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
611       }
612       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
613       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
614         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
615       }
616     }
617     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
618     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
619     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
620     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
621     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
622     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
623     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
624     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
625     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
626     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
627     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
628     if (pcbddc->coarse_psi_B) {
629       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
630       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
631     }
632     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
633     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
634     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
635     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
636   }
637   /* free memory */
638   if (n_vertices) {
639     ierr = PetscFree(vertices);CHKERRQ(ierr);
640     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
641     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
643     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
644     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
645   }
646   if (n_constraints) {
647     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
648     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
651   }
652   ierr = PetscFree(auxindices);CHKERRQ(ierr);
653   /* get back data */
654   *coarse_submat_vals_n = coarse_submat_vals;
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
660 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
661 {
662   PC_IS*            pcis = (PC_IS*)(pc->data);
663   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
664   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
665   /* manage repeated solves */
666   MatReuse          reuse;
667   MatStructure      matstruct;
668   PetscErrorCode    ierr;
669 
670   PetscFunctionBegin;
671   /* get mat flags */
672   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
673   reuse = MAT_INITIAL_MATRIX;
674   if (pc->setupcalled) {
675     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
676     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
677     if (matstruct == SAME_NONZERO_PATTERN) {
678       reuse = MAT_REUSE_MATRIX;
679     } else {
680       reuse = MAT_INITIAL_MATRIX;
681     }
682   }
683   if (reuse == MAT_INITIAL_MATRIX) {
684     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
685     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
686     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
687     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
688     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
689   }
690 
691   /* transform local matrices if needed */
692   if (pcbddc->use_change_of_basis) {
693     Mat         change_mat_all;
694     PetscScalar *row_cmat_values;
695     PetscInt    *row_cmat_indices;
696     PetscInt    *nnz,*is_indices,*temp_indices;
697     PetscInt    i,j,k,n_D,n_B;
698 
699     /* Get Non-overlapping dimensions */
700     n_B = pcis->n_B;
701     n_D = pcis->n-n_B;
702 
703     /* compute nonzero structure of change of basis on all local nodes */
704     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
705     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
706     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
707     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
708     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
709     k=1;
710     for (i=0;i<n_B;i++) {
711       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
712       nnz[is_indices[i]]=j;
713       if (k < j) k = j;
714       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
715     }
716     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     /* assemble change of basis matrix on the whole set of local dofs */
718     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
719     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
720     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
721     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
722     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
723     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
724     for (i=0;i<n_D;i++) {
725       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
726     }
727     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     for (i=0;i<n_B;i++) {
730       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
731       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
732       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
733       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
734     }
735     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
738     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
739     if (i==1) {
740       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
741     } else {
742       Mat work_mat;
743       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
744       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
745       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
746     }
747     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
748     ierr = PetscFree(nnz);CHKERRQ(ierr);
749     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
750   } else {
751     /* without change of basis, the local matrix is unchanged */
752     if (!pcbddc->local_mat) {
753       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
754       pcbddc->local_mat = matis->A;
755     }
756   }
757 
758   /* get submatrices */
759   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
760   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
761   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
762   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
769 {
770   PC_IS*         pcis = (PC_IS*)(pc->data);
771   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
772   IS             is_aux1,is_aux2;
773   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
774   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
775   PetscBT        bitmask;
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   /* Set Non-overlapping dimensions */
780   n_B = pcis->n_B; n_D = pcis->n - n_B;
781   /* get vertex indices from constraint matrix */
782   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
783   /* Set number of constraints */
784   n_constraints = pcbddc->local_primal_size-n_vertices;
785   /* create auxiliary bitmask */
786   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
787   for (i=0;i<n_vertices;i++) {
788     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
789   }
790   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
791   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
792   for (i=0, n_R=0; i<pcis->n; i++) {
793     if (!PetscBTLookup(bitmask,i)) {
794       idx_R_local[n_R] = i;
795       n_R++;
796     }
797   }
798   ierr = PetscFree(vertices);CHKERRQ(ierr);
799   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
800 
801   /* print some info if requested */
802   if (pcbddc->dbg_flag) {
803     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
805     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
806     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
807     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
808     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
809     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
810   }
811 
812   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
813   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
814   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
815   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816   for (i=0; i<n_D; i++) {
817     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
818   }
819   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
820   for (i=0, j=0; i<n_R; i++) {
821     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
822       aux_array1[j++] = i;
823     }
824   }
825   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
826   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
827   for (i=0, j=0; i<n_B; i++) {
828     if (!PetscBTLookup(bitmask,is_indices[i])) {
829       aux_array2[j++] = i;
830     }
831   }
832   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
834   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
835   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
836   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
837 
838   if (pcbddc->switch_static || pcbddc->dbg_flag) {
839     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
840     for (i=0, j=0; i<n_R; i++) {
841       if (PetscBTLookup(bitmask,idx_R_local[i])) {
842         aux_array1[j++] = i;
843       }
844     }
845     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
846     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
847     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   }
849   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
855 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
856 {
857   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
858 
859   PetscFunctionBegin;
860   pcbddc->use_exact_dirichlet=use;
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
866 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
867 {
868   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
869   PC_IS          *pcis = (PC_IS*)pc->data;
870   PC             pc_temp;
871   Mat            A_RR;
872   Vec            vec1,vec2,vec3;
873   MatStructure   matstruct;
874   PetscScalar    m_one = -1.0;
875   PetscReal      value;
876   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
877   PetscErrorCode ierr;
878 
879   PetscFunctionBegin;
880   /* Creating PC contexts for local Dirichlet and Neumann problems */
881   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
882 
883   /* DIRICHLET PROBLEM */
884   /* Matrix for Dirichlet problem is pcis->A_II */
885   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
886   if (!pcbddc->ksp_D) { /* create object if not yet build */
887     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
888     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
889     /* default */
890     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
891     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
892     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
893     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
894     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
895   }
896   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
897   /* Allow user's customization */
898   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
899   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
900   if (!n_D) {
901     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
902     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
903   }
904   /* Set Up KSP for Dirichlet problem of BDDC */
905   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
906   /* set ksp_D into pcis data */
907   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
908   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
909   pcis->ksp_D = pcbddc->ksp_D;
910 
911   /* NEUMANN PROBLEM */
912   /* Matrix for Neumann problem is A_RR -> we need to create it */
913   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
914   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
915   if (!pcbddc->ksp_R) { /* create object if not yet build */
916     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
917     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
918     /* default */
919     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
920     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
921     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
922     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
923     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
924   }
925   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
926   /* Allow user's customization */
927   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
928   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
929   if (!n_R) {
930     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
931     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
932   }
933   /* Set Up KSP for Neumann problem of BDDC */
934   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
935 
936   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
937 
938   /* Dirichlet */
939   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
940   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
941   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
942   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
943   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
944   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
945   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
946   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
947   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
948   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
949   /* need to be adapted? */
950   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
951   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
952   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
953   /* print info */
954   if (pcbddc->dbg_flag) {
955     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
956     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
957     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
958     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
959     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
960   }
961   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
962     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
963   }
964 
965   /* Neumann */
966   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
967   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
968   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
969   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
970   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
971   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
972   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
973   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
974   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
975   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
976   /* need to be adapted? */
977   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
978   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
979   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
980   /* print info */
981   if (pcbddc->dbg_flag) {
982     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
983     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
984   }
985   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
986     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
987   }
988 
989   /* free Neumann problem's matrix */
990   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
996 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
997 {
998   PetscErrorCode ierr;
999   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1000 
1001   PetscFunctionBegin;
1002   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1003   if (pcbddc->local_auxmat1) {
1004     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1005     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1012 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1013 {
1014   PetscErrorCode ierr;
1015   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1016   PC_IS*            pcis = (PC_IS*)  (pc->data);
1017   const PetscScalar zero = 0.0;
1018 
1019   PetscFunctionBegin;
1020   /* Application of PHI^T (or PSI^T)  */
1021   if (pcbddc->coarse_psi_B) {
1022     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1023     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1024   } else {
1025     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1026     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1027   }
1028   /* Scatter data of coarse_rhs */
1029   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1030   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031 
1032   /* Local solution on R nodes */
1033   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1034   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1036   if (pcbddc->switch_static) {
1037     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1038     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1039   }
1040   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1041   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1042   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1044   if (pcbddc->switch_static) {
1045     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1047   }
1048 
1049   /* Coarse solution */
1050   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1052     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1053   }
1054   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056 
1057   /* Sum contributions from two levels */
1058   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1059   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1065 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1066 {
1067   PetscErrorCode ierr;
1068   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1069 
1070   PetscFunctionBegin;
1071   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1077 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1078 {
1079   PetscErrorCode ierr;
1080   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1081 
1082   PetscFunctionBegin;
1083   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* uncomment for testing purposes */
1088 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1089 #undef __FUNCT__
1090 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1091 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1092 {
1093   PetscErrorCode    ierr;
1094   PC_IS*            pcis = (PC_IS*)(pc->data);
1095   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1096   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1097   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1098   MatType           impMatType=MATSEQAIJ;
1099   /* one and zero */
1100   PetscScalar       one=1.0,zero=0.0;
1101   /* space to store constraints and their local indices */
1102   PetscScalar       *temp_quadrature_constraint;
1103   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1104   /* iterators */
1105   PetscInt          i,j,k,total_counts,temp_start_ptr;
1106   /* stuff to store connected components stored in pcbddc->mat_graph */
1107   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1108   PetscInt          n_ISForFaces,n_ISForEdges;
1109   /* near null space stuff */
1110   MatNullSpace      nearnullsp;
1111   const Vec         *nearnullvecs;
1112   Vec               *localnearnullsp;
1113   PetscBool         nnsp_has_cnst;
1114   PetscInt          nnsp_size;
1115   PetscScalar       *array;
1116   /* BLAS integers */
1117   PetscBLASInt      lwork,lierr;
1118   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1119   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1120   /* LAPACK working arrays for SVD or POD */
1121   PetscBool         skip_lapack;
1122   PetscScalar       *work;
1123   PetscReal         *singular_vals;
1124 #if defined(PETSC_USE_COMPLEX)
1125   PetscReal         *rwork;
1126 #endif
1127 #if defined(PETSC_MISSING_LAPACK_GESVD)
1128   PetscBLASInt      Blas_one_2=1;
1129   PetscScalar       *temp_basis,*correlation_mat;
1130 #else
1131   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1132   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1133 #endif
1134   /* change of basis */
1135   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1136   PetscBool         boolforchange;
1137   PetscBT           touched,change_basis;
1138   /* auxiliary stuff */
1139   PetscInt          *nnz,*is_indices,*local_to_B;
1140   /* some quantities */
1141   PetscInt          n_vertices,total_primal_vertices;
1142   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1143 
1144 
1145   PetscFunctionBegin;
1146   /* Get index sets for faces, edges and vertices from graph */
1147   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1148     pcbddc->use_vertices = PETSC_TRUE;
1149   }
1150   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1151   /* print some info */
1152   if (pcbddc->dbg_flag) {
1153     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1154     i = 0;
1155     if (ISForVertices) {
1156       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1157     }
1158     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1159     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1160     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1161     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1162   }
1163   /* check if near null space is attached to global mat */
1164   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1165   if (nearnullsp) {
1166     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1167   } else { /* if near null space is not provided BDDC uses constants by default */
1168     nnsp_size = 0;
1169     nnsp_has_cnst = PETSC_TRUE;
1170   }
1171   /* get max number of constraints on a single cc */
1172   max_constraints = nnsp_size;
1173   if (nnsp_has_cnst) max_constraints++;
1174 
1175   /*
1176        Evaluate maximum storage size needed by the procedure
1177        - temp_indices will contain start index of each constraint stored as follows
1178        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1179        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1180        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1181                                                                                                                                                          */
1182   total_counts = n_ISForFaces+n_ISForEdges;
1183   total_counts *= max_constraints;
1184   n_vertices = 0;
1185   if (ISForVertices) {
1186     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1187   }
1188   total_counts += n_vertices;
1189   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1190   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1191   total_counts = 0;
1192   max_size_of_constraint = 0;
1193   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1194     if (i<n_ISForEdges) {
1195       used_IS = &ISForEdges[i];
1196     } else {
1197       used_IS = &ISForFaces[i-n_ISForEdges];
1198     }
1199     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1200     total_counts += j;
1201     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1202   }
1203   total_counts *= max_constraints;
1204   total_counts += n_vertices;
1205   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1206   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1207   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1208   /* local to boundary numbering */
1209   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1210   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1211   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1212   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1213   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1214   /* get local part of global near null space vectors */
1215   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1216   for (k=0;k<nnsp_size;k++) {
1217     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220   }
1221 
1222   /* whether or not to skip lapack calls */
1223   skip_lapack = PETSC_TRUE;
1224   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1225 
1226   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1227   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1228     PetscScalar temp_work;
1229 #if defined(PETSC_MISSING_LAPACK_GESVD)
1230     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1231     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1232     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1233     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1234 #if defined(PETSC_USE_COMPLEX)
1235     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1236 #endif
1237     /* now we evaluate the optimal workspace using query with lwork=-1 */
1238     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1239     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1240     lwork = -1;
1241     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1242 #if !defined(PETSC_USE_COMPLEX)
1243     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1244 #else
1245     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1246 #endif
1247     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1248     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1249 #else /* on missing GESVD */
1250     /* SVD */
1251     PetscInt max_n,min_n;
1252     max_n = max_size_of_constraint;
1253     min_n = max_constraints;
1254     if (max_size_of_constraint < max_constraints) {
1255       min_n = max_size_of_constraint;
1256       max_n = max_constraints;
1257     }
1258     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1259 #if defined(PETSC_USE_COMPLEX)
1260     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1261 #endif
1262     /* now we evaluate the optimal workspace using query with lwork=-1 */
1263     lwork = -1;
1264     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1265     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1266     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1267     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1268 #if !defined(PETSC_USE_COMPLEX)
1269     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1270 #else
1271     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1272 #endif
1273     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1274     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1275 #endif /* on missing GESVD */
1276     /* Allocate optimal workspace */
1277     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1278     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1279   }
1280   /* Now we can loop on constraining sets */
1281   total_counts = 0;
1282   temp_indices[0] = 0;
1283   /* vertices */
1284   if (ISForVertices) {
1285     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1286     if (nnsp_has_cnst) { /* consider all vertices */
1287       for (i=0;i<n_vertices;i++) {
1288         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1289         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1290         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1291         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1292         total_counts++;
1293       }
1294     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1295       PetscBool used_vertex;
1296       for (i=0;i<n_vertices;i++) {
1297         used_vertex = PETSC_FALSE;
1298         k = 0;
1299         while (!used_vertex && k<nnsp_size) {
1300           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1301           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1302             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1303             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1304             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1305             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1306             total_counts++;
1307             used_vertex = PETSC_TRUE;
1308           }
1309           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1310           k++;
1311         }
1312       }
1313     }
1314     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1315     n_vertices = total_counts;
1316   }
1317 
1318   /* edges and faces */
1319   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1320     if (i<n_ISForEdges) {
1321       used_IS = &ISForEdges[i];
1322       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1323     } else {
1324       used_IS = &ISForFaces[i-n_ISForEdges];
1325       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1326     }
1327     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1328     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1329     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1330     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1331     /* change of basis should not be performed on local periodic nodes */
1332     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1333     if (nnsp_has_cnst) {
1334       PetscScalar quad_value;
1335       temp_constraints++;
1336       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1337       for (j=0;j<size_of_constraint;j++) {
1338         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1339         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1340         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1341       }
1342       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1343       if (boolforchange) {
1344         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1345       }
1346       total_counts++;
1347     }
1348     for (k=0;k<nnsp_size;k++) {
1349       PetscReal real_value;
1350       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1351       for (j=0;j<size_of_constraint;j++) {
1352         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1353         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1354         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1355       }
1356       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1357       /* check if array is null on the connected component */
1358       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1359       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1360       if (real_value > 0.0) { /* keep indices and values */
1361         temp_constraints++;
1362         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1363         if (boolforchange) {
1364           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1365         }
1366         total_counts++;
1367       }
1368     }
1369     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1370     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1371     if (!pcbddc->use_nnsp_true) {
1372       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1373 
1374 #if defined(PETSC_MISSING_LAPACK_GESVD)
1375       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1376          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1377          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1378             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1379             from that computed using LAPACKgesvd
1380          -> This is due to a different computation of eigenvectors in LAPACKheev
1381          -> The quality of the POD-computed basis will be the same */
1382       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1383       /* Store upper triangular part of correlation matrix */
1384       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1385       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1386       for (j=0;j<temp_constraints;j++) {
1387         for (k=0;k<j+1;k++) {
1388           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1389         }
1390       }
1391       /* compute eigenvalues and eigenvectors of correlation matrix */
1392       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1393       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1394 #if !defined(PETSC_USE_COMPLEX)
1395       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1396 #else
1397       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1398 #endif
1399       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1400       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1401       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1402       j=0;
1403       while (j < temp_constraints && singular_vals[j] < tol) j++;
1404       total_counts=total_counts-j;
1405       /* scale and copy POD basis into used quadrature memory */
1406       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1407       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1408       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1409       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1410       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1411       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1412       if (j<temp_constraints) {
1413         PetscInt ii;
1414         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1415         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1416         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1417         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1418         for (k=0;k<temp_constraints-j;k++) {
1419           for (ii=0;ii<size_of_constraint;ii++) {
1420             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1421           }
1422         }
1423       }
1424 #else  /* on missing GESVD */
1425       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1426       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1427       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1428       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1429 #if !defined(PETSC_USE_COMPLEX)
1430       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1431 #else
1432       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1433 #endif
1434       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1435       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1436       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1437       k = temp_constraints;
1438       if (k > size_of_constraint) k = size_of_constraint;
1439       j = 0;
1440       while (j < k && singular_vals[k-j-1] < tol) j++;
1441       total_counts = total_counts-temp_constraints+k-j;
1442 #endif /* on missing GESVD */
1443     }
1444   }
1445   /* free index sets of faces, edges and vertices */
1446   for (i=0;i<n_ISForFaces;i++) {
1447     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1448   }
1449   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1450   for (i=0;i<n_ISForEdges;i++) {
1451     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1452   }
1453   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1454   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1455 
1456   /* free workspace */
1457   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1458     ierr = PetscFree(work);CHKERRQ(ierr);
1459 #if defined(PETSC_USE_COMPLEX)
1460     ierr = PetscFree(rwork);CHKERRQ(ierr);
1461 #endif
1462     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1463 #if defined(PETSC_MISSING_LAPACK_GESVD)
1464     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1465     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1466 #endif
1467   }
1468   for (k=0;k<nnsp_size;k++) {
1469     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1470   }
1471   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1472 
1473   /* set quantities in pcbddc data structure */
1474   /* n_vertices defines the number of subdomain corners in the primal space */
1475   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1476   pcbddc->local_primal_size = total_counts;
1477   pcbddc->n_vertices = n_vertices;
1478   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1479 
1480   /* Create constraint matrix */
1481   /* The constraint matrix is used to compute the l2g map of primal dofs */
1482   /* so we need to set it up properly either with or without change of basis */
1483   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1484   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1485   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1486   /* array to compute a local numbering of constraints : vertices first then constraints */
1487   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1488   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1489   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1490   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1491   /* auxiliary stuff for basis change */
1492   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1493   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1494 
1495   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1496   total_primal_vertices=0;
1497   for (i=0;i<pcbddc->local_primal_size;i++) {
1498     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1499     if (size_of_constraint == 1) {
1500       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1501       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1502       aux_primal_minloc[total_primal_vertices]=0;
1503       total_primal_vertices++;
1504     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1505       PetscInt min_loc,min_index;
1506       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1507       /* find first untouched local node */
1508       k = 0;
1509       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1510       min_index = global_indices[k];
1511       min_loc = k;
1512       /* search the minimum among global nodes already untouched on the cc */
1513       for (k=1;k<size_of_constraint;k++) {
1514         /* there can be more than one constraint on a single connected component */
1515         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1516           min_index = global_indices[k];
1517           min_loc = k;
1518         }
1519       }
1520       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1521       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1522       aux_primal_minloc[total_primal_vertices]=min_loc;
1523       total_primal_vertices++;
1524     }
1525   }
1526   /* free workspace */
1527   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1528   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1529   /* permute indices in order to have a sorted set of vertices */
1530   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1531 
1532   /* nonzero structure of constraint matrix */
1533   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1534   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1535   j=total_primal_vertices;
1536   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1537     if (!PetscBTLookup(change_basis,i)) {
1538       nnz[j]=temp_indices[i+1]-temp_indices[i];
1539       j++;
1540     }
1541   }
1542   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1543   ierr = PetscFree(nnz);CHKERRQ(ierr);
1544   /* set values in constraint matrix */
1545   for (i=0;i<total_primal_vertices;i++) {
1546     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1547   }
1548   total_counts = total_primal_vertices;
1549   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1550     if (!PetscBTLookup(change_basis,i)) {
1551       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1552       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1553       total_counts++;
1554     }
1555   }
1556   /* assembling */
1557   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1559   /*
1560   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1561   */
1562   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1563   if (pcbddc->use_change_of_basis) {
1564     PetscBool qr_needed = PETSC_FALSE;
1565     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1566     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1567     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1568     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1569     /* work arrays */
1570     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1571     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1572     /* nonzeros per row */
1573     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1574       if (PetscBTLookup(change_basis,i)) {
1575         qr_needed = PETSC_TRUE;
1576         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1577         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1578       }
1579     }
1580     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1581     ierr = PetscFree(nnz);CHKERRQ(ierr);
1582     /* Set initial identity in the matrix */
1583     for (i=0;i<pcis->n_B;i++) {
1584       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1585     }
1586 
1587     /* Now we loop on the constraints which need a change of basis */
1588     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1589     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1590     /* Change of basis matrix T computed via QR decomposition of constraints */
1591     if (qr_needed) {
1592       /* dual and primal dofs on a single cc */
1593       PetscInt     dual_dofs,primal_dofs;
1594       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1595       PetscInt     primal_counter;
1596       /* working stuff for GEQRF */
1597       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1598       PetscBLASInt lqr_work;
1599       /* working stuff for UNGQR */
1600       PetscScalar  *gqr_work,lgqr_work_t;
1601       PetscBLASInt lgqr_work;
1602       /* working stuff for TRTRS */
1603       PetscScalar  *trs_rhs;
1604       PetscBLASInt Blas_NRHS;
1605       /* pointers for values insertion into change of basis matrix */
1606       PetscInt     *start_rows,*start_cols;
1607       PetscScalar  *start_vals;
1608       /* working stuff for values insertion */
1609       PetscBT      is_primal;
1610 
1611       /* space to store Q */
1612       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1613       /* first we issue queries for optimal work */
1614       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1615       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1616       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1617       lqr_work = -1;
1618       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1619       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1620       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1621       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1622       lgqr_work = -1;
1623       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1624       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1625       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1626       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1627       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1628       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1629       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1630       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1631       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1632       /* array to store scaling factors for reflectors */
1633       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1634       /* array to store rhs and solution of triangular solver */
1635       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1636       /* array to store whether a node is primal or not */
1637       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1638       for (i=0;i<total_primal_vertices;i++) {
1639         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1640       }
1641 
1642       /* allocating workspace for check */
1643       if (pcbddc->dbg_flag) {
1644         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1645         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1646         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1647       }
1648 
1649       /* loop on constraints and see whether or not they need a change of basis */
1650       /* -> using implicit ordering contained in temp_indices data */
1651       total_counts = pcbddc->n_vertices;
1652       primal_counter = total_counts;
1653       while (total_counts<pcbddc->local_primal_size) {
1654         primal_dofs = 1;
1655         if (PetscBTLookup(change_basis,total_counts)) {
1656           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1657           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1658             primal_dofs++;
1659           }
1660           /* get constraint info */
1661           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1662           dual_dofs = size_of_constraint-primal_dofs;
1663 
1664           /* copy quadrature constraints for change of basis check */
1665           if (pcbddc->dbg_flag) {
1666             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1667             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1668           }
1669 
1670           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1671           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1672 
1673           /* compute QR decomposition of constraints */
1674           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1675           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1676           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1677           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1678           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1679           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1680           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1681 
1682           /* explictly compute R^-T */
1683           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1684           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1685           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1686           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1687           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1688           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1689           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1690           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1691           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1692           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1693 
1694           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1695           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1696           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1697           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1698           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1699           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1700           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1701           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1702           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1703 
1704           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1705              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1706              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1709           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1710           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1711           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1712           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1713           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1714           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1715           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1716           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1717 
1718           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1719           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1720           /* insert cols for primal dofs */
1721           for (j=0;j<primal_dofs;j++) {
1722             start_vals = &qr_basis[j*size_of_constraint];
1723             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1724             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1725           }
1726           /* insert cols for dual dofs */
1727           for (j=0,k=0;j<dual_dofs;k++) {
1728             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1729               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1730               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1731               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1732               j++;
1733             }
1734           }
1735 
1736           /* check change of basis */
1737           if (pcbddc->dbg_flag) {
1738             PetscInt   ii,jj;
1739             PetscBool valid_qr=PETSC_TRUE;
1740             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1741             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1742             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1743             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1744             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1745             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1746             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1747             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1748             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1749             for (jj=0;jj<size_of_constraint;jj++) {
1750               for (ii=0;ii<primal_dofs;ii++) {
1751                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1752                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1753               }
1754             }
1755             if (!valid_qr) {
1756               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1757               for (jj=0;jj<size_of_constraint;jj++) {
1758                 for (ii=0;ii<primal_dofs;ii++) {
1759                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1760                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1761                   }
1762                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1763                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1764                   }
1765                 }
1766               }
1767             } else {
1768               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1769             }
1770           }
1771           /* increment primal counter */
1772           primal_counter += primal_dofs;
1773         } else {
1774           if (pcbddc->dbg_flag) {
1775             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1776           }
1777         }
1778         /* increment constraint counter total_counts */
1779         total_counts += primal_dofs;
1780       }
1781       if (pcbddc->dbg_flag) {
1782         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1783         ierr = PetscFree(work);CHKERRQ(ierr);
1784       }
1785       /* free workspace */
1786       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1787       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1788       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1789       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1790       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1791       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1792     }
1793     /* assembling */
1794     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796     /*
1797     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1798     */
1799   }
1800   /* free workspace */
1801   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1802   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1803   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1804   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1805   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1806   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1807   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1808   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1814 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1815 {
1816   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1817   PC_IS       *pcis = (PC_IS*)pc->data;
1818   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1819   PetscInt    bs,ierr,i,vertex_size;
1820   PetscViewer viewer=pcbddc->dbg_viewer;
1821 
1822   PetscFunctionBegin;
1823   /* Init local Graph struct */
1824   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1825 
1826   /* Check validity of the csr graph passed in by the user */
1827   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1828     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1829   }
1830   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1831   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1832     Mat mat_adj;
1833     const PetscInt *xadj,*adjncy;
1834     PetscBool flg_row=PETSC_TRUE;
1835 
1836     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1837     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1838     if (!flg_row) {
1839       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1840     }
1841     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1842     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1843     if (!flg_row) {
1844       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1845     }
1846     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1847   }
1848 
1849   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1850   vertex_size = 1;
1851   if (!pcbddc->n_ISForDofs) {
1852     IS *custom_ISForDofs;
1853 
1854     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1855     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1856     for (i=0;i<bs;i++) {
1857       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1858     }
1859     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1860     /* remove my references to IS objects */
1861     for (i=0;i<bs;i++) {
1862       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1863     }
1864     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1865   } else { /* mat block size as vertex size (used for elasticity) */
1866     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1867   }
1868 
1869   /* Setup of Graph */
1870   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1871 
1872   /* Graph's connected components analysis */
1873   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1874 
1875   /* print some info to stdout */
1876   if (pcbddc->dbg_flag) {
1877     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1878   }
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1884 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1885 {
1886   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1887   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   n = 0;
1892   vertices = 0;
1893   if (pcbddc->ConstraintMatrix) {
1894     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1895     for (i=0;i<local_primal_size;i++) {
1896       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1897       if (size_of_constraint == 1) n++;
1898       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1899     }
1900     if (vertices_idx) {
1901       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1902       n = 0;
1903       for (i=0;i<local_primal_size;i++) {
1904         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1905         if (size_of_constraint == 1) {
1906           vertices[n++]=row_cmat_indices[0];
1907         }
1908         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1909       }
1910     }
1911   }
1912   *n_vertices = n;
1913   if (vertices_idx) *vertices_idx = vertices;
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1919 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1920 {
1921   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1922   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1923   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1924   PetscBT        touched;
1925   PetscErrorCode ierr;
1926 
1927     /* This function assumes that the number of local constraints per connected component
1928        is not greater than the number of nodes defined for the connected component
1929        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1930   PetscFunctionBegin;
1931   n = 0;
1932   constraints_index = 0;
1933   if (pcbddc->ConstraintMatrix) {
1934     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1935     max_size_of_constraint = 0;
1936     for (i=0;i<local_primal_size;i++) {
1937       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1938       if (size_of_constraint > 1) {
1939         n++;
1940       }
1941       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1942       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1943     }
1944     if (constraints_idx) {
1945       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1946       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1947       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1948       n = 0;
1949       for (i=0;i<local_primal_size;i++) {
1950         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1951         if (size_of_constraint > 1) {
1952           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1953           /* find first untouched local node */
1954           j = 0;
1955           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1956           min_index = row_cmat_global_indices[j];
1957           min_loc = j;
1958           /* search the minimum among nodes not yet touched on the connected component
1959              since there can be more than one constraint on a single cc */
1960           for (j=1;j<size_of_constraint;j++) {
1961             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1962               min_index = row_cmat_global_indices[j];
1963               min_loc = j;
1964             }
1965           }
1966           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1967           constraints_index[n++] = row_cmat_indices[min_loc];
1968         }
1969         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1970       }
1971       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1972       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1973     }
1974   }
1975   *n_constraints = n;
1976   if (constraints_idx) *constraints_idx = constraints_index;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 /* the next two functions has been adapted from pcis.c */
1981 #undef __FUNCT__
1982 #define __FUNCT__ "PCBDDCApplySchur"
1983 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
1984 {
1985   PetscErrorCode ierr;
1986   PC_IS          *pcis = (PC_IS*)(pc->data);
1987 
1988   PetscFunctionBegin;
1989   if (!vec2_B) { vec2_B = v; }
1990   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
1991   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
1992   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
1993   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
1994   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2000 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2001 {
2002   PetscErrorCode ierr;
2003   PC_IS          *pcis = (PC_IS*)(pc->data);
2004 
2005   PetscFunctionBegin;
2006   if (!vec2_B) { vec2_B = v; }
2007   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2008   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2009   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2010   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2011   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 #undef __FUNCT__
2016 #define __FUNCT__ "PCBDDCSubsetNumbering"
2017 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2018 {
2019   Vec            local_vec,global_vec;
2020   IS             seqis,paris;
2021   VecScatter     scatter_ctx;
2022   PetscScalar    *array;
2023   PetscInt       *temp_global_dofs;
2024   PetscScalar    globalsum;
2025   PetscInt       i,j,s;
2026   PetscInt       nlocals,first_index,old_index,max_local;
2027   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2028   PetscMPIInt    *dof_sizes,*dof_displs;
2029   PetscBool      first_found;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   /* mpi buffers */
2034   MPI_Comm_size(comm,&size_prec_comm);
2035   MPI_Comm_rank(comm,&rank_prec_comm);
2036   j = ( !rank_prec_comm ? size_prec_comm : 0);
2037   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2038   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2039   /* get maximum size of subset */
2040   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2041   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2042   max_local = 0;
2043   if (n_local_dofs) {
2044     max_local = temp_global_dofs[0];
2045     for (i=1;i<n_local_dofs;i++) {
2046       if (max_local < temp_global_dofs[i] ) {
2047         max_local = temp_global_dofs[i];
2048       }
2049     }
2050   }
2051   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2052   max_global++;
2053   max_local = 0;
2054   if (n_local_dofs) {
2055     max_local = local_dofs[0];
2056     for (i=1;i<n_local_dofs;i++) {
2057       if (max_local < local_dofs[i] ) {
2058         max_local = local_dofs[i];
2059       }
2060     }
2061   }
2062   max_local++;
2063   /* allocate workspace */
2064   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2065   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2066   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2067   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2068   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2069   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2070   /* create scatter */
2071   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2072   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2073   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2074   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2075   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2076   /* init array */
2077   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2078   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2079   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2080   if (local_dofs_mult) {
2081     for (i=0;i<n_local_dofs;i++) {
2082       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2083     }
2084   } else {
2085     for (i=0;i<n_local_dofs;i++) {
2086       array[local_dofs[i]]=1.0;
2087     }
2088   }
2089   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2090   /* scatter into global vec and get total number of global dofs */
2091   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2092   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2093   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2094   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2095   /* Fill global_vec with cumulative function for global numbering */
2096   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2097   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2098   nlocals = 0;
2099   first_index = -1;
2100   first_found = PETSC_FALSE;
2101   for (i=0;i<s;i++) {
2102     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2103       first_found = PETSC_TRUE;
2104       first_index = i;
2105     }
2106     nlocals += (PetscInt)PetscRealPart(array[i]);
2107   }
2108   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2109   if (!rank_prec_comm) {
2110     dof_displs[0]=0;
2111     for (i=1;i<size_prec_comm;i++) {
2112       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2113     }
2114   }
2115   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2116   if (first_found) {
2117     array[first_index] += (PetscScalar)nlocals;
2118     old_index = first_index;
2119     for (i=first_index+1;i<s;i++) {
2120       if (PetscRealPart(array[i]) > 0.0) {
2121         array[i] += array[old_index];
2122         old_index = i;
2123       }
2124     }
2125   }
2126   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2127   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2128   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2129   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2130   /* get global ordering of local dofs */
2131   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2132   if (local_dofs_mult) {
2133     for (i=0;i<n_local_dofs;i++) {
2134       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2135     }
2136   } else {
2137     for (i=0;i<n_local_dofs;i++) {
2138       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2139     }
2140   }
2141   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2142   /* free workspace */
2143   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2144   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2145   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2146   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2147   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2148   /* return pointer to global ordering of local dofs */
2149   *global_numbering_subset = temp_global_dofs;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2155 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2156 {
2157   PetscInt       i,j;
2158   PetscScalar    *alphas;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   /* this implements stabilized Gram-Schmidt */
2163   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2164   for (i=0;i<n;i++) {
2165     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2166     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2167     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2168   }
2169   ierr = PetscFree(alphas);CHKERRQ(ierr);
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatConvert_IS_AIJ"
2176 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2177 {
2178   Mat new_mat;
2179   Mat_IS *matis = (Mat_IS*)(mat->data);
2180   /* info on mat */
2181   PetscInt bs,rows,cols;
2182   PetscInt lrows,lcols;
2183   PetscInt local_rows,local_cols;
2184   PetscMPIInt nsubdomains;
2185   /* preallocation */
2186   Vec vec_dnz,vec_onz;
2187   PetscScalar *my_dnz,*my_onz,*array;
2188   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2189   PetscInt  index_row,index_col,owner;
2190   PetscInt  *local_indices,*global_indices;
2191   /* work */
2192   PetscInt i,j;
2193   PetscErrorCode ierr;
2194 
2195   PetscFunctionBegin;
2196   /* CHECKS */
2197   /* get info from mat */
2198   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2199   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2200   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2201   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2202 
2203   /* MAT_INITIAL_MATRIX */
2204   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2205   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2206   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2207   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2208   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2209   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2210 
2211   /*
2212     preallocation
2213   */
2214 
2215   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2216   /*
2217      Some vectors are needed to sum up properly on shared interface dofs.
2218      Note that preallocation is not exact, since it overestimates nonzeros
2219   */
2220 /*
2221   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2222   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2223   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2224   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2225 */
2226   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2227   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2228   /* All processes need to compute entire row ownership */
2229   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2230   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2231   for (i=0;i<nsubdomains;i++) {
2232     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2233       row_ownership[j]=i;
2234     }
2235   }
2236   /* map indices of local mat to global values */
2237   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2238   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2239   for (i=0;i<local_rows;i++) local_indices[i]=i;
2240   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2241   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2242 
2243   /*
2244      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2245      then, they will be summed up properly. This way, preallocation is always sufficient
2246   */
2247   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2248   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2249   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2250   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2251   for (i=0;i<local_rows;i++) {
2252     index_row = global_indices[i];
2253     for (j=i;j<local_rows;j++) {
2254       owner = row_ownership[index_row];
2255       index_col = global_indices[j];
2256       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2257         my_dnz[i] += 1.0;
2258       } else { /* offdiag block */
2259         my_onz[i] += 1.0;
2260       }
2261       /* same as before, interchanging rows and cols */
2262       if (i != j) {
2263         owner = row_ownership[index_col];
2264         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2265           my_dnz[j] += 1.0;
2266         } else {
2267           my_onz[j] += 1.0;
2268         }
2269       }
2270     }
2271   }
2272   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2273   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2274   if (local_rows) { /* multilevel guard */
2275     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2276     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2277   }
2278   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2279   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2280   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2281   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2282   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2283   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2284   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2285 
2286   /* set computed preallocation in dnz and onz */
2287   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2288   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2289   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2290   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2291   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2292   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2293   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2294   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2295 
2296   /* Resize preallocation if overestimated */
2297   for (i=0;i<lrows;i++) {
2298     dnz[i] = PetscMin(dnz[i],lcols);
2299     onz[i] = PetscMin(onz[i],cols-lcols);
2300   }
2301   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2302   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2303 
2304   /*
2305     Set values. Very Basic.
2306   */
2307   for (i=0;i<local_rows;i++) {
2308     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2309     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2310     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2311     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2312     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2313   }
2314   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2315   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2316   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317 
2318   /* get back mat */
2319   *M = new_mat;
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatISSubassemble_Private"
2325 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2326 {
2327   Mat             subdomain_adj;
2328   IS              new_ranks,ranks_send_to;
2329   MatPartitioning partitioner;
2330   Mat_IS          *matis;
2331   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2332   PetscInt        prank;
2333   PetscMPIInt     size,rank,color;
2334   PetscInt        *xadj,*adjncy,*oldranks;
2335   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2336   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2337   PetscErrorCode  ierr;
2338   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2339   PetscSubcomm    subcomm;
2340 
2341   PetscFunctionBegin;
2342   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2343   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2344   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2345 
2346   /* Get info on mapping */
2347   matis = (Mat_IS*)(mat->data);
2348   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2349   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2350 
2351   /* build local CSR graph of subdomains' connectivity */
2352   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2353   xadj[0] = 0;
2354   xadj[1] = PetscMax(n_neighs-1,0);
2355   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2356   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2357 
2358   if (threshold) {
2359     PetscInt* count,min_threshold;
2360     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2361     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2362     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2363       for (j=0;j<n_shared[i];j++) {
2364         count[shared[i][j]] += 1;
2365       }
2366     }
2367     /* adapt threshold since we dont want to lose significant connections */
2368     min_threshold = n_neighs;
2369     for (i=1;i<n_neighs;i++) {
2370       for (j=0;j<n_shared[i];j++) {
2371         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2372       }
2373     }
2374     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2375     threshold = PetscMax(min_threshold+1,threshold);
2376     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2377     xadj[1] = 0;
2378     for (i=1;i<n_neighs;i++) {
2379       for (j=0;j<n_shared[i];j++) {
2380         if (count[shared[i][j]] < threshold) {
2381           adjncy[xadj[1]] = neighs[i];
2382           adjncy_wgt[xadj[1]] = n_shared[i];
2383           xadj[1]++;
2384           break;
2385         }
2386       }
2387     }
2388     ierr = PetscFree(count);CHKERRQ(ierr);
2389   } else {
2390     if (xadj[1]) {
2391       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2392       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2393     }
2394   }
2395   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2396   if (use_square) {
2397     for (i=0;i<xadj[1];i++) {
2398       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2399     }
2400   }
2401   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2402 
2403   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2404 
2405   /*
2406     Restrict work on active processes only.
2407   */
2408   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2409   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2410   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2411   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2412   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr);
2413   if (color) {
2414     ierr = PetscFree(xadj);CHKERRQ(ierr);
2415     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2416     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2417   } else {
2418     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2419     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2420     prank = rank;
2421     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2422     for (i=0;i<size;i++) {
2423       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2424     }
2425     for (i=0;i<xadj[1];i++) {
2426       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2427     }
2428     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2429     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2430     n_subdomains = (PetscInt)size;
2431     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2432 
2433     /* Partition */
2434     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2435     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2436     if (use_vwgt) {
2437       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2438       v_wgt[0] = local_size;
2439       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2440     }
2441     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2442     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2443     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2444     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2445     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2446 
2447     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2448     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2449     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2450     /* clean up */
2451     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2452     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2453     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2454     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2455   }
2456   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2457 
2458   /* assemble parallel IS for sends */
2459   i = 1;
2460   if (color) i=0;
2461   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2462   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2463 
2464   /* get back IS */
2465   *is_sends = ranks_send_to;
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatISSubassemble"
2473 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2474 {
2475   Mat                    local_mat,new_mat;
2476   Mat_IS                 *matis;
2477   IS                     is_sends_internal;
2478   PetscInt               rows,cols;
2479   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2480   PetscBool              ismatis,isdense;
2481   ISLocalToGlobalMapping l2gmap;
2482   PetscInt*              l2gmap_indices;
2483   const PetscInt*        is_indices;
2484   MatType                new_local_type;
2485   MatTypePrivate         new_local_type_private;
2486   /* buffers */
2487   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2488   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2489   /* MPI */
2490   MPI_Comm               comm;
2491   PetscMPIInt            n_sends,n_recvs,commsize;
2492   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2493   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2494   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2495   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2496   PetscErrorCode         ierr;
2497 
2498   PetscFunctionBegin;
2499   /* checks */
2500   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2501   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2502   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2503   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2504   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2505   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2506   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2507   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2508   PetscValidLogicalCollectiveInt(mat,bs,0);
2509   /* prepare IS for sending if not provided */
2510   if (!is_sends) {
2511     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2512     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2513   } else {
2514     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2515     is_sends_internal = is_sends;
2516   }
2517 
2518   /* get pointer of MATIS data */
2519   matis = (Mat_IS*)mat->data;
2520 
2521   /* get comm */
2522   comm = PetscObjectComm((PetscObject)mat);
2523 
2524   /* compute number of sends */
2525   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2526   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2527 
2528   /* compute number of receives */
2529   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2530   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2531   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2532   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2533   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2534   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2535   ierr = PetscFree(iflags);CHKERRQ(ierr);
2536 
2537   /* prepare send/receive buffers */
2538   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2539   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2540   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2541   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2542 
2543   /* Get data from local mat */
2544   if (!isdense) {
2545     /* TODO: See below some guidelines on how to prepare the local buffers */
2546     /*
2547        send_buffer_vals should contain the raw values of the local matrix
2548        send_buffer_idxs should contain:
2549        - MatType_PRIVATE type
2550        - PetscInt        size_of_l2gmap
2551        - PetscInt        global_row_indices[size_of_l2gmap]
2552        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2553     */
2554   } else {
2555     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2556     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2557     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2558     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2559     send_buffer_idxs[1] = i;
2560     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2561     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2562     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2563     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2564     for (i=0;i<n_sends;i++) {
2565       ilengths_vals[is_indices[i]] = len*len;
2566       ilengths_idxs[is_indices[i]] = len+2;
2567     }
2568   }
2569   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2570   buf_size_idxs = 0;
2571   buf_size_vals = 0;
2572   for (i=0;i<n_recvs;i++) {
2573     buf_size_idxs += (PetscInt)olengths_idxs[i];
2574     buf_size_vals += (PetscInt)olengths_vals[i];
2575   }
2576   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2577   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2578 
2579   /* get new tags for clean communications */
2580   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2581   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2582 
2583   /* allocate for requests */
2584   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2585   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2586   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2587   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2588 
2589   /* communications */
2590   ptr_idxs = recv_buffer_idxs;
2591   ptr_vals = recv_buffer_vals;
2592   for (i=0;i<n_recvs;i++) {
2593     source_dest = onodes[i];
2594     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2595     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2596     ptr_idxs += olengths_idxs[i];
2597     ptr_vals += olengths_vals[i];
2598   }
2599   for (i=0;i<n_sends;i++) {
2600     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2601     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2602     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2603   }
2604   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2605   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2606 
2607   /* assemble new l2g map */
2608   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2609   ptr_idxs = recv_buffer_idxs;
2610   buf_size_idxs = 0;
2611   for (i=0;i<n_recvs;i++) {
2612     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2613     ptr_idxs += olengths_idxs[i];
2614   }
2615   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2616   ptr_idxs = recv_buffer_idxs;
2617   buf_size_idxs = 0;
2618   for (i=0;i<n_recvs;i++) {
2619     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2620     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2621     ptr_idxs += olengths_idxs[i];
2622   }
2623   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2624   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2625   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2626 
2627   /* infer new local matrix type from received local matrices type */
2628   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2629   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2630   new_local_type_private = MATAIJ_PRIVATE;
2631   new_local_type = MATSEQAIJ;
2632   if (n_recvs) {
2633     new_local_type_private = send_buffer_idxs[0];
2634     ptr_idxs = recv_buffer_idxs;
2635     for (i=0;i<n_recvs;i++) {
2636       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2637         new_local_type_private = MATAIJ_PRIVATE;
2638         break;
2639       }
2640       ptr_idxs += olengths_idxs[i];
2641     }
2642     switch (new_local_type_private) {
2643       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2644         new_local_type = MATSEQAIJ;
2645         bs = 1;
2646         break;
2647       case MATAIJ_PRIVATE:
2648         new_local_type = MATSEQAIJ;
2649         bs = 1;
2650         break;
2651       case MATBAIJ_PRIVATE:
2652         new_local_type = MATSEQBAIJ;
2653         break;
2654       case MATSBAIJ_PRIVATE:
2655         new_local_type = MATSEQSBAIJ;
2656         break;
2657       default:
2658         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2659         break;
2660     }
2661   }
2662 
2663   /* create MATIS object */
2664   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2665   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2666   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2667   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2668   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2669   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2670 
2671   /* set values */
2672   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2673   ptr_vals = recv_buffer_vals;
2674   ptr_idxs = recv_buffer_idxs;
2675   for (i=0;i<n_recvs;i++) {
2676     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2677       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2678       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2679       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2680       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2681       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2682     }
2683     ptr_idxs += olengths_idxs[i];
2684     ptr_vals += olengths_vals[i];
2685   }
2686   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2687   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2688   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2689   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2690 
2691   { /* check */
2692     Vec       lvec,rvec;
2693     PetscReal infty_error;
2694 
2695     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2696     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2697     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2698     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2699     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2700     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2701     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2702     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2703     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2704   }
2705 
2706   /* free workspace */
2707   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2708   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2709   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2710   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2711   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2712   if (isdense) {
2713     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2714     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2715   } else {
2716     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2717   }
2718   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2719   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2720   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2721   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2722   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2723   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2724   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2725   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2726   ierr = PetscFree(onodes);CHKERRQ(ierr);
2727   /* get back new mat */
2728   *mat_n = new_mat;
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2734 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2735 {
2736   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2737   PC_IS                  *pcis = (PC_IS*)pc->data;
2738   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2739   MatNullSpace           CoarseNullSpace=NULL;
2740   ISLocalToGlobalMapping coarse_islg;
2741   IS                     coarse_is;
2742   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2743   PetscInt               im_active=-1,active_procs=-1;
2744   PC                     pc_temp;
2745   PCType                 coarse_pc_type;
2746   KSPType                coarse_ksp_type;
2747   PetscBool              multilevel_requested,multilevel_allowed;
2748   PetscBool              setsym,issym,isbddc,isnn;
2749   MatStructure           matstruct;
2750   PetscErrorCode         ierr;
2751 
2752   PetscFunctionBegin;
2753   /* Assign global numbering to coarse dofs */
2754   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2755 
2756   /* infer some info from user */
2757   issym = PETSC_FALSE;
2758   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2759   multilevel_allowed = PETSC_FALSE;
2760   multilevel_requested = PETSC_FALSE;
2761   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2762   if (multilevel_requested) {
2763     /* count "active processes" */
2764     im_active = !!(pcis->n);
2765     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2766     if (active_procs/pcbddc->coarsening_ratio < 2) {
2767       multilevel_allowed = PETSC_FALSE;
2768     } else {
2769       multilevel_allowed = PETSC_TRUE;
2770     }
2771   }
2772 
2773   /* set defaults for coarse KSP and PC */
2774   if (multilevel_allowed) {
2775     if (issym) {
2776       coarse_ksp_type = KSPRICHARDSON;
2777     } else {
2778       coarse_ksp_type = KSPCHEBYSHEV;
2779     }
2780     coarse_pc_type = PCBDDC;
2781   } else {
2782     coarse_ksp_type = KSPPREONLY;
2783     coarse_pc_type = PCREDUNDANT;
2784   }
2785 
2786   /* create the coarse KSP object only once with defaults */
2787   if (!pcbddc->coarse_ksp) {
2788     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2789     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2790     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2791     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2792     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2793     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2794     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
2795   }
2796   /* allow user customization */
2797   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2798 
2799   /* get some info after set from options */
2800   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2801   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
2802   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2803   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
2804     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2805     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2806     isbddc = PETSC_FALSE;
2807   }
2808 
2809   /* propagate BDDC info to the next level */
2810   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2811   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2812   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2813 
2814   /* Compute coarse null space (special handling by BDDC only) */
2815   if (pcbddc->NullSpace) {
2816     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2817     if (isbddc) {
2818       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2819     } else {
2820       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2821     }
2822   }
2823 
2824   /* creates temporary MATIS object for coarse matrix */
2825   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2826   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2827   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2828   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2829   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2830   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2831   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2832   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2833   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2834   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2835 
2836   /* assemble coarse matrix */
2837   if (isbddc || isnn) {
2838     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2839   } else {
2840     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2841   }
2842   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2843 
2844   /* create local to global scatters for coarse problem */
2845   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2846   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2847   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2848 
2849   /* propagate symmetry info to coarse matrix */
2850   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2851 
2852   /* set operators */
2853   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
2854   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
2855 
2856   /* additional KSP customization */
2857   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2858   if (max_it < 5) {
2859     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2860   }
2861   //ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr);
2862 
2863 
2864   /* print some info if requested */
2865   if (pcbddc->dbg_flag) {
2866     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
2867     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
2868     if (!multilevel_allowed) {
2869       if (multilevel_requested) {
2870         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2871       } else if (pcbddc->max_levels) {
2872         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
2873       }
2874     }
2875     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level);CHKERRQ(ierr);
2876     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2877   }
2878 
2879   /* setup coarse ksp */
2880   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2881   if (pcbddc->dbg_flag) {
2882     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
2883     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2884     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
2885     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2886   }
2887 
2888   /* HACK: TODO see if it could be avoided */
2889   if (isbddc) {
2890     ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
2891   }
2892 
2893   /* Check coarse problem if requested */
2894   if (pcbddc->dbg_flag) {
2895     KSP       check_ksp;
2896     KSPType   check_ksp_type;
2897     PC        check_pc;
2898     Vec       check_vec;
2899     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
2900     PetscInt  its;
2901     PetscBool ispreonly,compute;
2902 
2903     /* Create ksp object suitable for estimation of extreme eigenvalues */
2904     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
2905     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2906     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
2907     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
2908     if (ispreonly) {
2909       check_ksp_type = KSPPREONLY;
2910       compute = PETSC_FALSE;
2911     } else {
2912       if (issym) check_ksp_type = KSPCG;
2913       else check_ksp_type = KSPGMRES;
2914       compute = PETSC_TRUE;
2915     }
2916     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
2917     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
2918     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
2919     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
2920     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
2921     /* create random vec */
2922     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
2923     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
2924     if (CoarseNullSpace) {
2925       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
2926     }
2927     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2928     /* solve coarse problem */
2929     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
2930     if (CoarseNullSpace) {
2931       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
2932     }
2933     /* check coarse problem residual error */
2934     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
2935     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2936     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2937     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
2938     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
2939     /* get eigenvalue estimation if preonly has not been requested */
2940     if (!ispreonly) {
2941       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
2942       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
2943       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
2944     }
2945     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
2946     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
2947     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2948     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
2949   }
2950   /* free memory */
2951   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
2952   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
2958 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
2959 {
2960   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2961   PC_IS*         pcis = (PC_IS*)pc->data;
2962   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
2963   PetscInt       i,j,coarse_size;
2964   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   /* get indices in local ordering for vertices and constraints */
2969   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2970   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2971   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2972   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2973   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2974   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2975   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2976 
2977   /* Compute global number of coarse dofs */
2978   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2979 
2980   /* check numbering */
2981   if (pcbddc->dbg_flag) {
2982     PetscScalar coarsesum,*array;
2983 
2984     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2985     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2986     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
2987     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2988     for (i=0;i<pcbddc->local_primal_size;i++) {
2989       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2990     }
2991     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
2992     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
2993     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2994     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2995     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2996     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2997     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2998     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2999     for (i=0;i<pcis->n;i++) {
3000       if (array[i] == 1.0) {
3001         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3002       }
3003     }
3004     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3005     for (i=0;i<pcis->n;i++) {
3006       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3007     }
3008     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3009     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3010     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3011     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3012     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3013     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3014     if (pcbddc->dbg_flag > 1) {
3015       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3016       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3017       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3018       for (i=0;i<pcbddc->local_primal_size;i++) {
3019         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3020       }
3021       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3022     }
3023     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3024   }
3025   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3026   /* get back data */
3027   *coarse_size_n = coarse_size;
3028   *local_primal_indices_n = local_primal_indices;
3029   PetscFunctionReturn(0);
3030 }
3031 
3032