xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 2b5107596076489fc208cec5c8bcff90c31b1d16)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "PCBDDCCreateWorkVectors"
110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
111 {
112   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
113   PC_IS          *pcis = (PC_IS*)pc->data;
114   VecType        impVecType;
115   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
120   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
121   local_primal_size = n_constraints+n_vertices;
122   n_R = pcis->n-n_vertices;
123   /* local work vectors */
124   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
125   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
126   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
127   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
128   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
129   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
130   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
131   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
132   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
133   if (n_constraints) {
134     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
135     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
136     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
144 {
145   PetscErrorCode         ierr;
146   /* pointers to pcis and pcbddc */
147   PC_IS*                 pcis = (PC_IS*)pc->data;
148   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
149   /* submatrices of local problem */
150   Mat                    A_RV,A_VR,A_VV;
151   /* working matrices */
152   Mat                    M1,M2,M3,C_CR;
153   /* working vectors */
154   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
155   /* additional working stuff */
156   IS                     is_aux;
157   ISLocalToGlobalMapping BtoNmap;
158   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
159   const PetscScalar      *array,*row_cmat_values;
160   const PetscInt         *row_cmat_indices,*idx_R_local;
161   PetscInt               *vertices,*idx_V_B,*auxindices;
162   PetscInt               n_vertices,n_constraints,size_of_constraint;
163   PetscInt               i,j,n_R,n_D,n_B;
164   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
165   /* Vector and matrix types */
166   VecType                impVecType;
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
184 
185   /* Allocating some extra storage just to be safe */
186   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
187   for (i=0;i<pcis->n;i++) auxindices[i]=i;
188 
189   /* vertices in boundary numbering */
190   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
191   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
192   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
193   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
194   if (i != n_vertices) {
195     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
196   }
197 
198   /* some work vectors on vertices and/or constraints */
199   if (n_vertices) {
200     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
201     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
202     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
203     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
204   }
205   if (n_constraints) {
206     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
208   }
209 
210   /* Precompute stuffs needed for preprocessing and application of BDDC*/
211   if (n_constraints) {
212     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
213     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
214     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
215     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
216 
217     /* Extract constraints on R nodes: C_{CR}  */
218     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
219     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
220     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
221 
222     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
223     for (i=0;i<n_constraints;i++) {
224       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
225       /* Get row of constraint matrix in R numbering */
226       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
227       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
228       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
229       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
230       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
231       /* Solve for row of constraint matrix in R numbering */
232       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
233       /* Set values in local_auxmat2 */
234       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
235       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
237     }
238     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
241 
242     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
243     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
244     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
245     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
246     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
247     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
248     ierr = MatSetUp(M1);CHKERRQ(ierr);
249     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
250     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
251     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
252     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
253     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
254     ierr = MatDestroy(&M2);CHKERRQ(ierr);
255     ierr = MatDestroy(&M3);CHKERRQ(ierr);
256     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
257     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
258   }
259 
260   /* Get submatrices from subdomain matrix */
261   if (n_vertices) {
262     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
263     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
264     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
265     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
266     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
267   }
268 
269   /* Matrix of coarse basis functions (local) */
270   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
271   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
272   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
273   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
274   if (pcbddc->switch_static || pcbddc->dbg_flag) {
275     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
276     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
277     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
279   }
280 
281   if (pcbddc->dbg_flag) {
282     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
283     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
284     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
285   }
286   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
287   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
288 
289   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
290 
291   /* vertices */
292   for (i=0;i<n_vertices;i++) {
293     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
294     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
295     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
296     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
297     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
298     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
299     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
300     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
301     if (n_constraints) {
302       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
303       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
304       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
305     }
306     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
307     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
308 
309     /* Set values in coarse basis function and subdomain part of coarse_mat */
310     /* coarse basis functions */
311     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
312     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
313     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
314     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
315     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
317     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
318     if (pcbddc->switch_static || pcbddc->dbg_flag) {
319       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
320       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
322       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
323       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
324     }
325     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
326     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
327     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
329     if (n_constraints) {
330       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
331       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
332       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
333     }
334 
335     /* check */
336     if (pcbddc->dbg_flag) {
337       /* assemble subdomain vector on local nodes */
338       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
339       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
340       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
341       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
342       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
343       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
344       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
345       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
346       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350       if (n_constraints) {
351         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
353         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354       }
355       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
357       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
358       /* check saddle point solution */
359       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
360       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
361       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
362       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
363       /* shift by the identity matrix */
364       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
365       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
366       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
367       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
368     }
369   }
370 
371   /* constraints */
372   for (i=0;i<n_constraints;i++) {
373     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
374     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
375     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
376     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
377     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
378     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
379     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
381     if (n_vertices) {
382       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
383     }
384     /* Set values in coarse basis function and subdomain part of coarse_mat */
385     /* coarse basis functions */
386     j = i+n_vertices; /* don't touch this! */
387     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
388     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
389     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
390     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
391     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
392     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
393     if (pcbddc->switch_static || pcbddc->dbg_flag) {
394       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
397       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
398       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
399     }
400     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
401     if (n_vertices) {
402       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
403       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
405     }
406     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
407     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
408     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
409 
410     if (pcbddc->dbg_flag) {
411       /* assemble subdomain vector on nodes */
412       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
413       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
414       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
416       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
417       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
418       /* assemble subdomain vector of lagrange multipliers */
419       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
420       if (n_vertices) {
421         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
422         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
423         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       }
425       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
430       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
431       /* check saddle point solution */
432       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
433       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
434       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
435       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
436       /* shift by the identity matrix */
437       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
441     }
442   }
443   /* call assembling routines for local coarse basis */
444   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
445   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
446   if (pcbddc->switch_static || pcbddc->dbg_flag) {
447     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
448     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
449   }
450 
451   /* compute other basis functions for non-symmetric problems */
452   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
453   if (!setsym || (setsym && !issym)) {
454     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
455     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
456     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
457     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
458     if (pcbddc->switch_static || pcbddc->dbg_flag) {
459       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
460       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
461       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
462       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
463     }
464     for (i=0;i<pcbddc->local_primal_size;i++) {
465       if (n_constraints) {
466         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
467         for (j=0;j<n_constraints;j++) {
468           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
469         }
470         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
471         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
472       }
473       if (i<n_vertices) {
474         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
475         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
476         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
477         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
478         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
479         if (n_constraints) {
480           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
481         }
482       } else {
483         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
484       }
485       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
486       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
487       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
490       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
491       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
492       if (i<n_vertices) {
493         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
494       }
495       if (pcbddc->switch_static || pcbddc->dbg_flag) {
496         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
498         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
499         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
500         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
501       }
502 
503       if (pcbddc->dbg_flag) {
504         /* assemble subdomain vector on nodes */
505         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
506         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
507         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
509         if (i<n_vertices) {
510           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
511         }
512         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
513         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
514         /* assemble subdomain vector of lagrange multipliers */
515         for (j=0;j<pcbddc->local_primal_size;j++) {
516           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
517         }
518         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520         /* check saddle point solution */
521         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
522         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
523         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
524         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
525         /* shift by the identity matrix */
526         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
527         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
528         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
529         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
530       }
531     }
532     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534     if (pcbddc->switch_static || pcbddc->dbg_flag) {
535       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
537     }
538   }
539   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
540   /* Checking coarse_sub_mat and coarse basis functios */
541   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
542   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
543   if (pcbddc->dbg_flag) {
544     Mat         coarse_sub_mat;
545     Mat         AUXMAT,TM1,TM2,TM3,TM4;
546     Mat         coarse_phi_D,coarse_phi_B;
547     Mat         coarse_psi_D,coarse_psi_B;
548     Mat         A_II,A_BB,A_IB,A_BI;
549     MatType     checkmattype=MATSEQAIJ;
550     PetscReal   real_value;
551 
552     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
553     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
554     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
555     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
556     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
557     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
558     if (pcbddc->coarse_psi_B) {
559       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
560       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
561     }
562     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
563 
564     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
565     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
566     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
567     if (pcbddc->coarse_psi_B) {
568       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
569       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
570       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
571       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
572       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
573       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
574       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
575       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
576       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
577       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
578       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
579       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
580     } else {
581       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
583       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589     }
590     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
591     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
592     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
593     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
594     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
595     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
596     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
597     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
598     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
599     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
600     for (i=0;i<pcbddc->local_primal_size;i++) {
601       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
602     }
603     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
604     for (i=0;i<pcbddc->local_primal_size;i++) {
605       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
606     }
607     if (pcbddc->coarse_psi_B) {
608       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
609       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
610         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
611       }
612       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
613       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
614         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
615       }
616     }
617     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
618     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
619     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
620     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
621     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
622     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
623     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
624     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
625     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
626     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
627     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
628     if (pcbddc->coarse_psi_B) {
629       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
630       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
631     }
632     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
633     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
634     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
635     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
636   }
637   /* free memory */
638   if (n_vertices) {
639     ierr = PetscFree(vertices);CHKERRQ(ierr);
640     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
641     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
643     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
644     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
645   }
646   if (n_constraints) {
647     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
648     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
651   }
652   ierr = PetscFree(auxindices);CHKERRQ(ierr);
653   /* get back data */
654   *coarse_submat_vals_n = coarse_submat_vals;
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
660 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
661 {
662   PC_IS*            pcis = (PC_IS*)(pc->data);
663   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
664   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
665   /* manage repeated solves */
666   MatReuse          reuse;
667   MatStructure      matstruct;
668   PetscErrorCode    ierr;
669 
670   PetscFunctionBegin;
671   /* get mat flags */
672   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
673   reuse = MAT_INITIAL_MATRIX;
674   if (pc->setupcalled) {
675     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
676     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
677     if (matstruct == SAME_NONZERO_PATTERN) {
678       reuse = MAT_REUSE_MATRIX;
679     } else {
680       reuse = MAT_INITIAL_MATRIX;
681     }
682   }
683   if (reuse == MAT_INITIAL_MATRIX) {
684     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
685     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
686     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
687     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
688     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
689   }
690 
691   /* transform local matrices if needed */
692   if (pcbddc->use_change_of_basis) {
693     Mat         change_mat_all;
694     PetscScalar *row_cmat_values;
695     PetscInt    *row_cmat_indices;
696     PetscInt    *nnz,*is_indices,*temp_indices;
697     PetscInt    i,j,k,n_D,n_B;
698 
699     /* Get Non-overlapping dimensions */
700     n_B = pcis->n_B;
701     n_D = pcis->n-n_B;
702 
703     /* compute nonzero structure of change of basis on all local nodes */
704     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
705     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
706     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
707     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
708     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
709     k=1;
710     for (i=0;i<n_B;i++) {
711       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
712       nnz[is_indices[i]]=j;
713       if (k < j) k = j;
714       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
715     }
716     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     /* assemble change of basis matrix on the whole set of local dofs */
718     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
719     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
720     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
721     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
722     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
723     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
724     for (i=0;i<n_D;i++) {
725       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
726     }
727     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     for (i=0;i<n_B;i++) {
730       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
731       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
732       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
733       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
734     }
735     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
738     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
739     if (i==1) {
740       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
741     } else {
742       Mat work_mat;
743       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
744       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
745       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
746     }
747     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
748     ierr = PetscFree(nnz);CHKERRQ(ierr);
749     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
750   } else {
751     /* without change of basis, the local matrix is unchanged */
752     if (!pcbddc->local_mat) {
753       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
754       pcbddc->local_mat = matis->A;
755     }
756   }
757 
758   /* get submatrices */
759   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
760   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
761   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
762   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
769 {
770   PC_IS*         pcis = (PC_IS*)(pc->data);
771   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
772   IS             is_aux1,is_aux2;
773   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
774   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
775   PetscBT        bitmask;
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   /* Set Non-overlapping dimensions */
780   n_B = pcis->n_B; n_D = pcis->n - n_B;
781   /* get vertex indices from constraint matrix */
782   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
783   /* Set number of constraints */
784   n_constraints = pcbddc->local_primal_size-n_vertices;
785   /* create auxiliary bitmask */
786   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
787   for (i=0;i<n_vertices;i++) {
788     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
789   }
790   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
791   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
792   for (i=0, n_R=0; i<pcis->n; i++) {
793     if (!PetscBTLookup(bitmask,i)) {
794       idx_R_local[n_R] = i;
795       n_R++;
796     }
797   }
798   ierr = PetscFree(vertices);CHKERRQ(ierr);
799   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
800 
801   /* print some info if requested */
802   if (pcbddc->dbg_flag) {
803     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
805     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
806     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
807     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
808     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
809     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
810   }
811 
812   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
813   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
814   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
815   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816   for (i=0; i<n_D; i++) {
817     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
818   }
819   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
820   for (i=0, j=0; i<n_R; i++) {
821     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
822       aux_array1[j++] = i;
823     }
824   }
825   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
826   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
827   for (i=0, j=0; i<n_B; i++) {
828     if (!PetscBTLookup(bitmask,is_indices[i])) {
829       aux_array2[j++] = i;
830     }
831   }
832   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
834   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
835   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
836   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
837 
838   if (pcbddc->switch_static || pcbddc->dbg_flag) {
839     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
840     for (i=0, j=0; i<n_R; i++) {
841       if (PetscBTLookup(bitmask,idx_R_local[i])) {
842         aux_array1[j++] = i;
843       }
844     }
845     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
846     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
847     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   }
849   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "PCBDDCSetUseExactDirichlet"
855 PetscErrorCode PCBDDCSetUseExactDirichlet(PC pc,PetscBool use)
856 {
857   PC_BDDC  *pcbddc = (PC_BDDC*)pc->data;
858 
859   PetscFunctionBegin;
860   pcbddc->use_exact_dirichlet=use;
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
866 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
867 {
868   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
869   PC_IS          *pcis = (PC_IS*)pc->data;
870   PC             pc_temp;
871   Mat            A_RR;
872   Vec            vec1,vec2,vec3;
873   MatStructure   matstruct;
874   PetscScalar    m_one = -1.0;
875   PetscReal      value;
876   PetscInt       n_D,n_R,use_exact,use_exact_reduced;
877   PetscErrorCode ierr;
878 
879   PetscFunctionBegin;
880   /* Creating PC contexts for local Dirichlet and Neumann problems */
881   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
882 
883   /* DIRICHLET PROBLEM */
884   /* Matrix for Dirichlet problem is pcis->A_II */
885   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
886   if (!pcbddc->ksp_D) { /* create object if not yet build */
887     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
888     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
889     /* default */
890     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
891     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,"dirichlet_");CHKERRQ(ierr);
892     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
893     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
894     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
895   }
896   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
897   /* Allow user's customization */
898   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
899   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
900   if (!n_D) {
901     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
902     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
903   }
904   /* Set Up KSP for Dirichlet problem of BDDC */
905   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
906   /* set ksp_D into pcis data */
907   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
908   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
909   pcis->ksp_D = pcbddc->ksp_D;
910 
911   /* NEUMANN PROBLEM */
912   /* Matrix for Neumann problem is A_RR -> we need to create it */
913   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
914   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
915   if (!pcbddc->ksp_R) { /* create object if not yet build */
916     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
917     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
918     /* default */
919     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
920     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,"neumann_");CHKERRQ(ierr);
921     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
922     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
923     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
924   }
925   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
926   /* Allow user's customization */
927   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
928   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
929   if (!n_R) {
930     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
931     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
932   }
933   /* Set Up KSP for Neumann problem of BDDC */
934   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
935 
936   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
937 
938   /* Dirichlet */
939   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
940   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
941   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
942   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
943   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
944   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
945   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
946   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
947   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
948   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
949   /* need to be adapted? */
950   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
951   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
952   ierr = PCBDDCSetUseExactDirichlet(pc,(PetscBool)use_exact_reduced);CHKERRQ(ierr);
953   /* print info */
954   if (pcbddc->dbg_flag) {
955     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
956     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
957     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
958     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
959     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
960   }
961   if (n_D && pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
962     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
963   }
964 
965   /* Neumann */
966   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
967   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
968   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
969   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
970   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
971   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
972   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
973   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
974   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
975   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
976   /* need to be adapted? */
977   use_exact = (PetscAbsReal(value) > 1.e-4 ? 0 : 1);
978   if (PetscAbsReal(value) > 1.e-4) use_exact = 0;
979   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_INT,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
980   /* print info */
981   if (pcbddc->dbg_flag) {
982     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for  Neumann  solve = % 1.14e \n",PetscGlobalRank,value);CHKERRQ(ierr);
983     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
984   }
985   if (n_R && pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
986     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
987   }
988 
989   /* free Neumann problem's matrix */
990   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
996 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
997 {
998   PetscErrorCode ierr;
999   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1000 
1001   PetscFunctionBegin;
1002   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1003   if (pcbddc->local_auxmat1) {
1004     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1005     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1012 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1013 {
1014   PetscErrorCode ierr;
1015   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1016   PC_IS*            pcis = (PC_IS*)  (pc->data);
1017   const PetscScalar zero = 0.0;
1018 
1019   PetscFunctionBegin;
1020   /* Application of PHI^T (or PSI^T)  */
1021   if (pcbddc->coarse_psi_B) {
1022     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1023     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1024   } else {
1025     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1026     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1027   }
1028   /* Scatter data of coarse_rhs */
1029   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1030   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031 
1032   /* Local solution on R nodes */
1033   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1034   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1035   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1036   if (pcbddc->switch_static) {
1037     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1038     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1039   }
1040   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1041   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1042   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1044   if (pcbddc->switch_static) {
1045     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1047   }
1048 
1049   /* Coarse solution */
1050   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1051   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1052     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1053   }
1054   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1055   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056 
1057   /* Sum contributions from two levels */
1058   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1059   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1065 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1066 {
1067   PetscErrorCode ierr;
1068   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1069 
1070   PetscFunctionBegin;
1071   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1077 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1078 {
1079   PetscErrorCode ierr;
1080   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1081 
1082   PetscFunctionBegin;
1083   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 /* uncomment for testing purposes */
1088 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1089 #undef __FUNCT__
1090 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1091 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1092 {
1093   PetscErrorCode    ierr;
1094   PC_IS*            pcis = (PC_IS*)(pc->data);
1095   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1096   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1097   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1098   MatType           impMatType=MATSEQAIJ;
1099   /* one and zero */
1100   PetscScalar       one=1.0,zero=0.0;
1101   /* space to store constraints and their local indices */
1102   PetscScalar       *temp_quadrature_constraint;
1103   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1104   /* iterators */
1105   PetscInt          i,j,k,total_counts,temp_start_ptr;
1106   /* stuff to store connected components stored in pcbddc->mat_graph */
1107   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1108   PetscInt          n_ISForFaces,n_ISForEdges;
1109   /* near null space stuff */
1110   MatNullSpace      nearnullsp;
1111   const Vec         *nearnullvecs;
1112   Vec               *localnearnullsp;
1113   PetscBool         nnsp_has_cnst;
1114   PetscInt          nnsp_size;
1115   PetscScalar       *array;
1116   /* BLAS integers */
1117   PetscBLASInt      lwork,lierr;
1118   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1119   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1120   /* LAPACK working arrays for SVD or POD */
1121   PetscBool         skip_lapack;
1122   PetscScalar       *work;
1123   PetscReal         *singular_vals;
1124 #if defined(PETSC_USE_COMPLEX)
1125   PetscReal         *rwork;
1126 #endif
1127 #if defined(PETSC_MISSING_LAPACK_GESVD)
1128   PetscBLASInt      Blas_one_2=1;
1129   PetscScalar       *temp_basis,*correlation_mat;
1130 #else
1131   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1132   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1133 #endif
1134   /* change of basis */
1135   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1136   PetscBool         boolforchange;
1137   PetscBT           touched,change_basis;
1138   /* auxiliary stuff */
1139   PetscInt          *nnz,*is_indices,*local_to_B;
1140   /* some quantities */
1141   PetscInt          n_vertices,total_primal_vertices;
1142   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1143 
1144 
1145   PetscFunctionBegin;
1146   /* Get index sets for faces, edges and vertices from graph */
1147   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1148     pcbddc->use_vertices = PETSC_TRUE;
1149   }
1150   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1151   /* print some info */
1152   if (pcbddc->dbg_flag) {
1153     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1154     i = 0;
1155     if (ISForVertices) {
1156       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1157     }
1158     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1159     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1160     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1161     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1162   }
1163   /* check if near null space is attached to global mat */
1164   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1165   if (nearnullsp) {
1166     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1167   } else { /* if near null space is not provided BDDC uses constants by default */
1168     nnsp_size = 0;
1169     nnsp_has_cnst = PETSC_TRUE;
1170   }
1171   /* get max number of constraints on a single cc */
1172   max_constraints = nnsp_size;
1173   if (nnsp_has_cnst) max_constraints++;
1174 
1175   /*
1176        Evaluate maximum storage size needed by the procedure
1177        - temp_indices will contain start index of each constraint stored as follows
1178        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1179        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1180        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1181                                                                                                                                                          */
1182   total_counts = n_ISForFaces+n_ISForEdges;
1183   total_counts *= max_constraints;
1184   n_vertices = 0;
1185   if (ISForVertices) {
1186     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1187   }
1188   total_counts += n_vertices;
1189   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1190   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1191   total_counts = 0;
1192   max_size_of_constraint = 0;
1193   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1194     if (i<n_ISForEdges) {
1195       used_IS = &ISForEdges[i];
1196     } else {
1197       used_IS = &ISForFaces[i-n_ISForEdges];
1198     }
1199     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1200     total_counts += j;
1201     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1202   }
1203   total_counts *= max_constraints;
1204   total_counts += n_vertices;
1205   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1206   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1207   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1208   /* local to boundary numbering */
1209   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1210   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1211   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1212   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1213   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1214   /* get local part of global near null space vectors */
1215   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1216   for (k=0;k<nnsp_size;k++) {
1217     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1218     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1219     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220   }
1221 
1222   /* whether or not to skip lapack calls */
1223   skip_lapack = PETSC_TRUE;
1224   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1225 
1226   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1227   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1228     PetscScalar temp_work;
1229 #if defined(PETSC_MISSING_LAPACK_GESVD)
1230     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1231     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1232     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1233     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1234 #if defined(PETSC_USE_COMPLEX)
1235     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1236 #endif
1237     /* now we evaluate the optimal workspace using query with lwork=-1 */
1238     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1239     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1240     lwork = -1;
1241     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1242 #if !defined(PETSC_USE_COMPLEX)
1243     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1244 #else
1245     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1246 #endif
1247     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1248     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1249 #else /* on missing GESVD */
1250     /* SVD */
1251     PetscInt max_n,min_n;
1252     max_n = max_size_of_constraint;
1253     min_n = max_constraints;
1254     if (max_size_of_constraint < max_constraints) {
1255       min_n = max_size_of_constraint;
1256       max_n = max_constraints;
1257     }
1258     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1259 #if defined(PETSC_USE_COMPLEX)
1260     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1261 #endif
1262     /* now we evaluate the optimal workspace using query with lwork=-1 */
1263     lwork = -1;
1264     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1265     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1266     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1267     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1268 #if !defined(PETSC_USE_COMPLEX)
1269     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1270 #else
1271     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1272 #endif
1273     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1274     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1275 #endif /* on missing GESVD */
1276     /* Allocate optimal workspace */
1277     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1278     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1279   }
1280   /* Now we can loop on constraining sets */
1281   total_counts = 0;
1282   temp_indices[0] = 0;
1283   /* vertices */
1284   if (ISForVertices) {
1285     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1286     if (nnsp_has_cnst) { /* consider all vertices */
1287       for (i=0;i<n_vertices;i++) {
1288         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1289         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1290         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1291         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1292         total_counts++;
1293       }
1294     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1295       PetscBool used_vertex;
1296       for (i=0;i<n_vertices;i++) {
1297         used_vertex = PETSC_FALSE;
1298         k = 0;
1299         while (!used_vertex && k<nnsp_size) {
1300           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1301           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1302             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1303             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1304             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1305             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1306             total_counts++;
1307             used_vertex = PETSC_TRUE;
1308           }
1309           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1310           k++;
1311         }
1312       }
1313     }
1314     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1315     n_vertices = total_counts;
1316   }
1317 
1318   /* edges and faces */
1319   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1320     if (i<n_ISForEdges) {
1321       used_IS = &ISForEdges[i];
1322       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1323     } else {
1324       used_IS = &ISForFaces[i-n_ISForEdges];
1325       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1326     }
1327     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1328     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1329     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1330     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1331     /* change of basis should not be performed on local periodic nodes */
1332     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1333     if (nnsp_has_cnst) {
1334       PetscScalar quad_value;
1335       temp_constraints++;
1336       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1337       for (j=0;j<size_of_constraint;j++) {
1338         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1339         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1340         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1341       }
1342       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1343       if (boolforchange) {
1344         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1345       }
1346       total_counts++;
1347     }
1348     for (k=0;k<nnsp_size;k++) {
1349       PetscReal real_value;
1350       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1351       for (j=0;j<size_of_constraint;j++) {
1352         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1353         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1354         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1355       }
1356       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1357       /* check if array is null on the connected component */
1358       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1359       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1360       if (real_value > 0.0) { /* keep indices and values */
1361         temp_constraints++;
1362         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1363         if (boolforchange) {
1364           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1365         }
1366         total_counts++;
1367       }
1368     }
1369     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1370     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1371     if (!pcbddc->use_nnsp_true) {
1372       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1373 
1374 #if defined(PETSC_MISSING_LAPACK_GESVD)
1375       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1376          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1377          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1378             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1379             from that computed using LAPACKgesvd
1380          -> This is due to a different computation of eigenvectors in LAPACKheev
1381          -> The quality of the POD-computed basis will be the same */
1382       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1383       /* Store upper triangular part of correlation matrix */
1384       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1385       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1386       for (j=0;j<temp_constraints;j++) {
1387         for (k=0;k<j+1;k++) {
1388           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1389         }
1390       }
1391       /* compute eigenvalues and eigenvectors of correlation matrix */
1392       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1393       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1394 #if !defined(PETSC_USE_COMPLEX)
1395       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1396 #else
1397       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1398 #endif
1399       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1400       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1401       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1402       j=0;
1403       while (j < temp_constraints && singular_vals[j] < tol) j++;
1404       total_counts=total_counts-j;
1405       /* scale and copy POD basis into used quadrature memory */
1406       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1407       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1408       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1409       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1410       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1411       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1412       if (j<temp_constraints) {
1413         PetscInt ii;
1414         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1415         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1416         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1417         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1418         for (k=0;k<temp_constraints-j;k++) {
1419           for (ii=0;ii<size_of_constraint;ii++) {
1420             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1421           }
1422         }
1423       }
1424 #else  /* on missing GESVD */
1425       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1426       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1427       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1428       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1429 #if !defined(PETSC_USE_COMPLEX)
1430       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1431 #else
1432       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1433 #endif
1434       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1435       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1436       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1437       k = temp_constraints;
1438       if (k > size_of_constraint) k = size_of_constraint;
1439       j = 0;
1440       while (j < k && singular_vals[k-j-1] < tol) j++;
1441       total_counts = total_counts-temp_constraints+k-j;
1442 #endif /* on missing GESVD */
1443     }
1444   }
1445   /* free index sets of faces, edges and vertices */
1446   for (i=0;i<n_ISForFaces;i++) {
1447     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1448   }
1449   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1450   for (i=0;i<n_ISForEdges;i++) {
1451     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1452   }
1453   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1454   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1455 
1456   /* free workspace */
1457   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1458     ierr = PetscFree(work);CHKERRQ(ierr);
1459 #if defined(PETSC_USE_COMPLEX)
1460     ierr = PetscFree(rwork);CHKERRQ(ierr);
1461 #endif
1462     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1463 #if defined(PETSC_MISSING_LAPACK_GESVD)
1464     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1465     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1466 #endif
1467   }
1468   for (k=0;k<nnsp_size;k++) {
1469     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1470   }
1471   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1472 
1473   /* set quantities in pcbddc data structure */
1474   /* n_vertices defines the number of subdomain corners in the primal space */
1475   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1476   pcbddc->local_primal_size = total_counts;
1477   pcbddc->n_vertices = n_vertices;
1478   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1479 
1480   /* Create constraint matrix */
1481   /* The constraint matrix is used to compute the l2g map of primal dofs */
1482   /* so we need to set it up properly either with or without change of basis */
1483   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1484   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1485   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1486   /* array to compute a local numbering of constraints : vertices first then constraints */
1487   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1488   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1489   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1490   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1491   /* auxiliary stuff for basis change */
1492   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1493   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1494 
1495   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1496   total_primal_vertices=0;
1497   for (i=0;i<pcbddc->local_primal_size;i++) {
1498     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1499     if (size_of_constraint == 1) {
1500       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1501       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1502       aux_primal_minloc[total_primal_vertices]=0;
1503       total_primal_vertices++;
1504     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1505       PetscInt min_loc,min_index;
1506       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1507       /* find first untouched local node */
1508       k = 0;
1509       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1510       min_index = global_indices[k];
1511       min_loc = k;
1512       /* search the minimum among global nodes already untouched on the cc */
1513       for (k=1;k<size_of_constraint;k++) {
1514         /* there can be more than one constraint on a single connected component */
1515         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1516           min_index = global_indices[k];
1517           min_loc = k;
1518         }
1519       }
1520       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1521       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1522       aux_primal_minloc[total_primal_vertices]=min_loc;
1523       total_primal_vertices++;
1524     }
1525   }
1526   /* free workspace */
1527   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1528   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1529   /* permute indices in order to have a sorted set of vertices */
1530   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1531 
1532   /* nonzero structure of constraint matrix */
1533   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1534   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1535   j=total_primal_vertices;
1536   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1537     if (!PetscBTLookup(change_basis,i)) {
1538       nnz[j]=temp_indices[i+1]-temp_indices[i];
1539       j++;
1540     }
1541   }
1542   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1543   ierr = PetscFree(nnz);CHKERRQ(ierr);
1544   /* set values in constraint matrix */
1545   for (i=0;i<total_primal_vertices;i++) {
1546     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1547   }
1548   total_counts = total_primal_vertices;
1549   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1550     if (!PetscBTLookup(change_basis,i)) {
1551       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1552       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1553       total_counts++;
1554     }
1555   }
1556   /* assembling */
1557   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1558   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1559   /*
1560   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1561   */
1562   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1563   if (pcbddc->use_change_of_basis) {
1564     PetscBool qr_needed = PETSC_FALSE;
1565     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1566     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1567     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1568     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1569     /* work arrays */
1570     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1571     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1572     /* nonzeros per row */
1573     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1574       if (PetscBTLookup(change_basis,i)) {
1575         qr_needed = PETSC_TRUE;
1576         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1577         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1578       }
1579     }
1580     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1581     ierr = PetscFree(nnz);CHKERRQ(ierr);
1582     /* Set initial identity in the matrix */
1583     for (i=0;i<pcis->n_B;i++) {
1584       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1585     }
1586 
1587     /* Now we loop on the constraints which need a change of basis */
1588     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1589     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1590     /* Change of basis matrix T computed via QR decomposition of constraints */
1591     if (qr_needed) {
1592       /* dual and primal dofs on a single cc */
1593       PetscInt     dual_dofs,primal_dofs;
1594       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1595       PetscInt     primal_counter;
1596       /* working stuff for GEQRF */
1597       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1598       PetscBLASInt lqr_work;
1599       /* working stuff for UNGQR */
1600       PetscScalar  *gqr_work,lgqr_work_t;
1601       PetscBLASInt lgqr_work;
1602       /* working stuff for TRTRS */
1603       PetscScalar  *trs_rhs;
1604       PetscBLASInt Blas_NRHS;
1605       /* pointers for values insertion into change of basis matrix */
1606       PetscInt     *start_rows,*start_cols;
1607       PetscScalar  *start_vals;
1608       /* working stuff for values insertion */
1609       PetscBT      is_primal;
1610 
1611       /* space to store Q */
1612       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1613       /* first we issue queries for optimal work */
1614       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1615       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1616       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1617       lqr_work = -1;
1618       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1619       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1620       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1621       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1622       lgqr_work = -1;
1623       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1624       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1625       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1626       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1627       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1628       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1629       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1630       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1631       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1632       /* array to store scaling factors for reflectors */
1633       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1634       /* array to store rhs and solution of triangular solver */
1635       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1636       /* array to store whether a node is primal or not */
1637       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1638       for (i=0;i<total_primal_vertices;i++) {
1639         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1640       }
1641 
1642       /* allocating workspace for check */
1643       if (pcbddc->dbg_flag) {
1644         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1645         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1646         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1647       }
1648 
1649       /* loop on constraints and see whether or not they need a change of basis */
1650       /* -> using implicit ordering contained in temp_indices data */
1651       total_counts = pcbddc->n_vertices;
1652       primal_counter = total_counts;
1653       while (total_counts<pcbddc->local_primal_size) {
1654         primal_dofs = 1;
1655         if (PetscBTLookup(change_basis,total_counts)) {
1656           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1657           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1658             primal_dofs++;
1659           }
1660           /* get constraint info */
1661           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1662           dual_dofs = size_of_constraint-primal_dofs;
1663 
1664           /* copy quadrature constraints for change of basis check */
1665           if (pcbddc->dbg_flag) {
1666             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1667             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1668           }
1669 
1670           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1671           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1672 
1673           /* compute QR decomposition of constraints */
1674           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1675           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1676           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1677           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1678           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1679           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1680           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1681 
1682           /* explictly compute R^-T */
1683           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1684           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1685           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1686           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1687           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1688           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1689           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1690           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1691           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1692           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1693 
1694           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1695           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1696           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1697           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1698           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1699           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1700           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1701           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1702           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1703 
1704           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1705              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1706              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1709           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1710           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1711           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1712           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1713           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1714           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1715           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1716           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1717 
1718           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1719           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1720           /* insert cols for primal dofs */
1721           for (j=0;j<primal_dofs;j++) {
1722             start_vals = &qr_basis[j*size_of_constraint];
1723             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1724             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1725           }
1726           /* insert cols for dual dofs */
1727           for (j=0,k=0;j<dual_dofs;k++) {
1728             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1729               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1730               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1731               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1732               j++;
1733             }
1734           }
1735 
1736           /* check change of basis */
1737           if (pcbddc->dbg_flag) {
1738             PetscInt   ii,jj;
1739             PetscBool valid_qr=PETSC_TRUE;
1740             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1741             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1742             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1743             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1744             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1745             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1746             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1747             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1748             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1749             for (jj=0;jj<size_of_constraint;jj++) {
1750               for (ii=0;ii<primal_dofs;ii++) {
1751                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1752                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1753               }
1754             }
1755             if (!valid_qr) {
1756               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1757               for (jj=0;jj<size_of_constraint;jj++) {
1758                 for (ii=0;ii<primal_dofs;ii++) {
1759                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1760                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1761                   }
1762                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1763                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1764                   }
1765                 }
1766               }
1767             } else {
1768               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1769             }
1770           }
1771           /* increment primal counter */
1772           primal_counter += primal_dofs;
1773         } else {
1774           if (pcbddc->dbg_flag) {
1775             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1776           }
1777         }
1778         /* increment constraint counter total_counts */
1779         total_counts += primal_dofs;
1780       }
1781       if (pcbddc->dbg_flag) {
1782         ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1783         ierr = PetscFree(work);CHKERRQ(ierr);
1784       }
1785       /* free workspace */
1786       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1787       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1788       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1789       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1790       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1791       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1792     }
1793     /* assembling */
1794     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796     /*
1797     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1798     */
1799   }
1800   /* free workspace */
1801   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1802   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1803   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1804   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1805   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1806   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1807   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1808   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1814 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1815 {
1816   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1817   PC_IS       *pcis = (PC_IS*)pc->data;
1818   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1819   PetscInt    bs,ierr,i,vertex_size;
1820   PetscViewer viewer=pcbddc->dbg_viewer;
1821 
1822   PetscFunctionBegin;
1823   /* Init local Graph struct */
1824   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1825 
1826   /* Check validity of the csr graph passed in by the user */
1827   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1828     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1829   }
1830   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1831   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1832     Mat mat_adj;
1833     const PetscInt *xadj,*adjncy;
1834     PetscBool flg_row=PETSC_TRUE;
1835 
1836     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1837     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1838     if (!flg_row) {
1839       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1840     }
1841     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1842     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1843     if (!flg_row) {
1844       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1845     }
1846     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1847   }
1848 
1849   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1850   vertex_size = 1;
1851   if (!pcbddc->n_ISForDofs) {
1852     IS *custom_ISForDofs;
1853 
1854     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1855     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1856     for (i=0;i<bs;i++) {
1857       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1858     }
1859     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1860     /* remove my references to IS objects */
1861     for (i=0;i<bs;i++) {
1862       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1863     }
1864     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1865   } else { /* mat block size as vertex size (used for elasticity) */
1866     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1867   }
1868 
1869   /* Setup of Graph */
1870   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1871 
1872   /* Graph's connected components analysis */
1873   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1874 
1875   /* print some info to stdout */
1876   if (pcbddc->dbg_flag) {
1877     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1878   }
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1884 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1885 {
1886   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1887   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   n = 0;
1892   vertices = 0;
1893   if (pcbddc->ConstraintMatrix) {
1894     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1895     for (i=0;i<local_primal_size;i++) {
1896       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1897       if (size_of_constraint == 1) n++;
1898       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1899     }
1900     if (vertices_idx) {
1901       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1902       n = 0;
1903       for (i=0;i<local_primal_size;i++) {
1904         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1905         if (size_of_constraint == 1) {
1906           vertices[n++]=row_cmat_indices[0];
1907         }
1908         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1909       }
1910     }
1911   }
1912   *n_vertices = n;
1913   if (vertices_idx) *vertices_idx = vertices;
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1919 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1920 {
1921   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1922   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1923   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1924   PetscBT        touched;
1925   PetscErrorCode ierr;
1926 
1927     /* This function assumes that the number of local constraints per connected component
1928        is not greater than the number of nodes defined for the connected component
1929        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1930   PetscFunctionBegin;
1931   n = 0;
1932   constraints_index = 0;
1933   if (pcbddc->ConstraintMatrix) {
1934     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1935     max_size_of_constraint = 0;
1936     for (i=0;i<local_primal_size;i++) {
1937       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1938       if (size_of_constraint > 1) {
1939         n++;
1940       }
1941       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1942       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1943     }
1944     if (constraints_idx) {
1945       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1946       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1947       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1948       n = 0;
1949       for (i=0;i<local_primal_size;i++) {
1950         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1951         if (size_of_constraint > 1) {
1952           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1953           /* find first untouched local node */
1954           j = 0;
1955           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1956           min_index = row_cmat_global_indices[j];
1957           min_loc = j;
1958           /* search the minimum among nodes not yet touched on the connected component
1959              since there can be more than one constraint on a single cc */
1960           for (j=1;j<size_of_constraint;j++) {
1961             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1962               min_index = row_cmat_global_indices[j];
1963               min_loc = j;
1964             }
1965           }
1966           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1967           constraints_index[n++] = row_cmat_indices[min_loc];
1968         }
1969         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1970       }
1971       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1972       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1973     }
1974   }
1975   *n_constraints = n;
1976   if (constraints_idx) *constraints_idx = constraints_index;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 /* the next two functions has been adapted from pcis.c */
1981 #undef __FUNCT__
1982 #define __FUNCT__ "PCBDDCApplySchur"
1983 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
1984 {
1985   PetscErrorCode ierr;
1986   PC_IS          *pcis = (PC_IS*)(pc->data);
1987 
1988   PetscFunctionBegin;
1989   if (!vec2_B) { vec2_B = v; }
1990   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
1991   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
1992   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
1993   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
1994   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2000 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2001 {
2002   PetscErrorCode ierr;
2003   PC_IS          *pcis = (PC_IS*)(pc->data);
2004 
2005   PetscFunctionBegin;
2006   if (!vec2_B) { vec2_B = v; }
2007   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2008   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2009   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2010   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2011   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 #undef __FUNCT__
2016 #define __FUNCT__ "PCBDDCSubsetNumbering"
2017 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2018 {
2019   Vec            local_vec,global_vec;
2020   IS             seqis,paris;
2021   VecScatter     scatter_ctx;
2022   PetscScalar    *array;
2023   PetscInt       *temp_global_dofs;
2024   PetscScalar    globalsum;
2025   PetscInt       i,j,s;
2026   PetscInt       nlocals,first_index,old_index,max_local;
2027   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2028   PetscMPIInt    *dof_sizes,*dof_displs;
2029   PetscBool      first_found;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   /* mpi buffers */
2034   MPI_Comm_size(comm,&size_prec_comm);
2035   MPI_Comm_rank(comm,&rank_prec_comm);
2036   j = ( !rank_prec_comm ? size_prec_comm : 0);
2037   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2038   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2039   /* get maximum size of subset */
2040   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2041   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2042   max_local = 0;
2043   if (n_local_dofs) {
2044     max_local = temp_global_dofs[0];
2045     for (i=1;i<n_local_dofs;i++) {
2046       if (max_local < temp_global_dofs[i] ) {
2047         max_local = temp_global_dofs[i];
2048       }
2049     }
2050   }
2051   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2052   max_global++;
2053   max_local = 0;
2054   if (n_local_dofs) {
2055     max_local = local_dofs[0];
2056     for (i=1;i<n_local_dofs;i++) {
2057       if (max_local < local_dofs[i] ) {
2058         max_local = local_dofs[i];
2059       }
2060     }
2061   }
2062   max_local++;
2063   /* allocate workspace */
2064   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2065   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2066   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2067   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2068   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2069   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2070   /* create scatter */
2071   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2072   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2073   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2074   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2075   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2076   /* init array */
2077   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2078   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2079   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2080   if (local_dofs_mult) {
2081     for (i=0;i<n_local_dofs;i++) {
2082       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2083     }
2084   } else {
2085     for (i=0;i<n_local_dofs;i++) {
2086       array[local_dofs[i]]=1.0;
2087     }
2088   }
2089   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2090   /* scatter into global vec and get total number of global dofs */
2091   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2092   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2093   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2094   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2095   /* Fill global_vec with cumulative function for global numbering */
2096   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2097   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2098   nlocals = 0;
2099   first_index = -1;
2100   first_found = PETSC_FALSE;
2101   for (i=0;i<s;i++) {
2102     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2103       first_found = PETSC_TRUE;
2104       first_index = i;
2105     }
2106     nlocals += (PetscInt)PetscRealPart(array[i]);
2107   }
2108   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2109   if (!rank_prec_comm) {
2110     dof_displs[0]=0;
2111     for (i=1;i<size_prec_comm;i++) {
2112       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2113     }
2114   }
2115   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2116   if (first_found) {
2117     array[first_index] += (PetscScalar)nlocals;
2118     old_index = first_index;
2119     for (i=first_index+1;i<s;i++) {
2120       if (PetscRealPart(array[i]) > 0.0) {
2121         array[i] += array[old_index];
2122         old_index = i;
2123       }
2124     }
2125   }
2126   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2127   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2128   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2129   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2130   /* get global ordering of local dofs */
2131   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2132   if (local_dofs_mult) {
2133     for (i=0;i<n_local_dofs;i++) {
2134       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2135     }
2136   } else {
2137     for (i=0;i<n_local_dofs;i++) {
2138       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2139     }
2140   }
2141   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2142   /* free workspace */
2143   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2144   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2145   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2146   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2147   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2148   /* return pointer to global ordering of local dofs */
2149   *global_numbering_subset = temp_global_dofs;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2155 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2156 {
2157   PetscInt       i,j;
2158   PetscScalar    *alphas;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   /* this implements stabilized Gram-Schmidt */
2163   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2164   for (i=0;i<n;i++) {
2165     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2166     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2167     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2168   }
2169   ierr = PetscFree(alphas);CHKERRQ(ierr);
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatConvert_IS_AIJ"
2176 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2177 {
2178   Mat new_mat;
2179   Mat_IS *matis = (Mat_IS*)(mat->data);
2180   /* info on mat */
2181   PetscInt bs,rows,cols;
2182   PetscInt lrows,lcols;
2183   PetscInt local_rows,local_cols;
2184   PetscMPIInt nsubdomains;
2185   /* preallocation */
2186   Vec vec_dnz,vec_onz;
2187   PetscScalar *my_dnz,*my_onz,*array;
2188   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2189   PetscInt  index_row,index_col,owner;
2190   PetscInt  *local_indices,*global_indices;
2191   /* work */
2192   PetscInt i,j;
2193   PetscErrorCode ierr;
2194 
2195   PetscFunctionBegin;
2196   /* CHECKS */
2197   /* get info from mat */
2198   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2199   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2200   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2201   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2202 
2203   /* MAT_INITIAL_MATRIX */
2204   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2205   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2206   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2207   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2208   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2209   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2210 
2211   /*
2212     preallocation
2213   */
2214 
2215   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2216   /*
2217      Some vectors are needed to sum up properly on shared interface dofs.
2218      Note that preallocation is not exact, since it overestimates nonzeros
2219   */
2220 /*
2221   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2222   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2223   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2224   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2225 */
2226   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2227   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2228   /* All processes need to compute entire row ownership */
2229   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2230   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2231   for (i=0;i<nsubdomains;i++) {
2232     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2233       row_ownership[j]=i;
2234     }
2235   }
2236   /* map indices of local mat to global values */
2237   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2238   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2239   for (i=0;i<local_rows;i++) local_indices[i]=i;
2240   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2241   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2242 
2243   /*
2244      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2245      then, they will be summed up properly. This way, preallocation is always sufficient
2246   */
2247   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2248   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2249   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2250   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2251   for (i=0;i<local_rows;i++) {
2252     index_row = global_indices[i];
2253     for (j=i;j<local_rows;j++) {
2254       owner = row_ownership[index_row];
2255       index_col = global_indices[j];
2256       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2257         my_dnz[i] += 1.0;
2258       } else { /* offdiag block */
2259         my_onz[i] += 1.0;
2260       }
2261       /* same as before, interchanging rows and cols */
2262       if (i != j) {
2263         owner = row_ownership[index_col];
2264         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2265           my_dnz[j] += 1.0;
2266         } else {
2267           my_onz[j] += 1.0;
2268         }
2269       }
2270     }
2271   }
2272   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2273   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2274   if (local_rows) { /* multilevel guard */
2275     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2276     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2277   }
2278   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2279   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2280   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2281   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2282   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2283   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2284   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2285 
2286   /* set computed preallocation in dnz and onz */
2287   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2288   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2289   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2290   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2291   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2292   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2293   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2294   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2295 
2296   /* Resize preallocation if overestimated */
2297   for (i=0;i<lrows;i++) {
2298     dnz[i] = PetscMin(dnz[i],lcols);
2299     onz[i] = PetscMin(onz[i],cols-lcols);
2300   }
2301   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2302   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2303 
2304   /*
2305     Set values. Very Basic.
2306   */
2307   for (i=0;i<local_rows;i++) {
2308     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2309     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2310     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2311     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2312     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2313   }
2314   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2315   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2316   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317 
2318   /* get back mat */
2319   *M = new_mat;
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatISSubassemble_Private"
2325 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2326 {
2327   Mat             subdomain_adj;
2328   IS              new_ranks,ranks_send_to;
2329   MatPartitioning partitioner;
2330   Mat_IS          *matis;
2331   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2332   PetscInt        prank;
2333   PetscMPIInt     size,rank,color;
2334   PetscInt        *xadj,*adjncy,*oldranks;
2335   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2336   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2337   PetscErrorCode  ierr;
2338   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2339   PetscSubcomm    subcomm;
2340 
2341   PetscFunctionBegin;
2342   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2343   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2344   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2345 
2346   /* Get info on mapping */
2347   matis = (Mat_IS*)(mat->data);
2348   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2349   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2350 
2351   /* build local CSR graph of subdomains' connectivity */
2352   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2353   xadj[0] = 0;
2354   xadj[1] = PetscMax(n_neighs-1,0);
2355   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2356   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2357 
2358   if (threshold) {
2359     PetscInt* count,min_threshold;
2360     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2361     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2362     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2363       for (j=0;j<n_shared[i];j++) {
2364         count[shared[i][j]] += 1;
2365       }
2366     }
2367     /* adapt threshold since we dont want to lose significant connections */
2368     min_threshold = n_neighs;
2369     for (i=1;i<n_neighs;i++) {
2370       for (j=0;j<n_shared[i];j++) {
2371         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2372       }
2373     }
2374     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2375     threshold = PetscMax(min_threshold+1,threshold);
2376     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2377     xadj[1] = 0;
2378     for (i=1;i<n_neighs;i++) {
2379       for (j=0;j<n_shared[i];j++) {
2380         if (count[shared[i][j]] < threshold) {
2381           adjncy[xadj[1]] = neighs[i];
2382           adjncy_wgt[xadj[1]] = n_shared[i];
2383           xadj[1]++;
2384           break;
2385         }
2386       }
2387     }
2388     ierr = PetscFree(count);CHKERRQ(ierr);
2389   } else {
2390     if (xadj[1]) {
2391       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2392       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2393     }
2394   }
2395   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2396   if (use_square) {
2397     for (i=0;i<xadj[1];i++) {
2398       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2399     }
2400   }
2401   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2402 
2403   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2404 
2405   /*
2406     Restrict work on active processes only.
2407   */
2408   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2409   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2410   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2411   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2412   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr);
2413   if (color) {
2414     ierr = PetscFree(xadj);CHKERRQ(ierr);
2415     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2416     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2417   } else {
2418     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2419     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2420     prank = rank;
2421     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2422     for (i=0;i<size;i++) {
2423       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2424     }
2425     for (i=0;i<xadj[1];i++) {
2426       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2427     }
2428     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2429     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2430     n_subdomains = (PetscInt)size;
2431     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2432 
2433     /* Partition */
2434     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2435     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2436     if (use_vwgt) {
2437       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2438       v_wgt[0] = local_size;
2439       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2440     }
2441     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2442     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2443     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2444     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2445     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2446     ierr = ISView(new_ranks,0);CHKERRQ(ierr);
2447 
2448     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2449     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2450     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2451     /* clean up */
2452     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2453     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2454     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2455     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2456   }
2457   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2458 
2459   /* assemble parallel IS for sends */
2460   i = 1;
2461   if (color) i=0;
2462   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2463   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2464 
2465   /* get back IS */
2466   *is_sends = ranks_send_to;
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatISSubassemble"
2474 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2475 {
2476   Mat                    local_mat,new_mat;
2477   Mat_IS                 *matis;
2478   IS                     is_sends_internal;
2479   PetscInt               rows,cols;
2480   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2481   PetscBool              ismatis,isdense;
2482   ISLocalToGlobalMapping l2gmap;
2483   PetscInt*              l2gmap_indices;
2484   const PetscInt*        is_indices;
2485   MatType                new_local_type;
2486   MatTypePrivate         new_local_type_private;
2487   /* buffers */
2488   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2489   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2490   /* MPI */
2491   MPI_Comm               comm;
2492   PetscMPIInt            n_sends,n_recvs,commsize;
2493   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2494   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2495   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2496   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2497   PetscErrorCode         ierr;
2498 
2499   PetscFunctionBegin;
2500   /* checks */
2501   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2502   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2503   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2504   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2505   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2506   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2507   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2508   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2509   PetscValidLogicalCollectiveInt(mat,bs,0);
2510   /* prepare IS for sending if not provided */
2511   if (!is_sends) {
2512     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2513     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2514   } else {
2515     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2516     is_sends_internal = is_sends;
2517   }
2518 
2519   /* get pointer of MATIS data */
2520   matis = (Mat_IS*)mat->data;
2521 
2522   /* get comm */
2523   comm = PetscObjectComm((PetscObject)mat);
2524 
2525   /* compute number of sends */
2526   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2527   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2528 
2529   /* compute number of receives */
2530   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2531   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2532   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2533   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2534   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2535   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2536   ierr = PetscFree(iflags);CHKERRQ(ierr);
2537 
2538   /* prepare send/receive buffers */
2539   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2540   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2541   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2542   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2543 
2544   /* Get data from local mat */
2545   if (!isdense) {
2546     /* TODO: See below some guidelines on how to prepare the local buffers */
2547     /*
2548        send_buffer_vals should contain the raw values of the local matrix
2549        send_buffer_idxs should contain:
2550        - MatType_PRIVATE type
2551        - PetscInt        size_of_l2gmap
2552        - PetscInt        global_row_indices[size_of_l2gmap]
2553        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2554     */
2555   } else {
2556     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2557     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2558     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2559     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2560     send_buffer_idxs[1] = i;
2561     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2562     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2563     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2564     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2565     for (i=0;i<n_sends;i++) {
2566       ilengths_vals[is_indices[i]] = len*len;
2567       ilengths_idxs[is_indices[i]] = len+2;
2568     }
2569   }
2570   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2571   buf_size_idxs = 0;
2572   buf_size_vals = 0;
2573   for (i=0;i<n_recvs;i++) {
2574     buf_size_idxs += (PetscInt)olengths_idxs[i];
2575     buf_size_vals += (PetscInt)olengths_vals[i];
2576   }
2577   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2578   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2579 
2580   /* get new tags for clean communications */
2581   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2582   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2583 
2584   /* allocate for requests */
2585   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2586   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2587   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2588   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2589 
2590   /* communications */
2591   ptr_idxs = recv_buffer_idxs;
2592   ptr_vals = recv_buffer_vals;
2593   for (i=0;i<n_recvs;i++) {
2594     source_dest = onodes[i];
2595     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2596     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2597     ptr_idxs += olengths_idxs[i];
2598     ptr_vals += olengths_vals[i];
2599   }
2600   for (i=0;i<n_sends;i++) {
2601     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2602     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2603     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2604   }
2605   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2606   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2607 
2608   /* assemble new l2g map */
2609   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2610   ptr_idxs = recv_buffer_idxs;
2611   buf_size_idxs = 0;
2612   for (i=0;i<n_recvs;i++) {
2613     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2614     ptr_idxs += olengths_idxs[i];
2615   }
2616   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2617   ptr_idxs = recv_buffer_idxs;
2618   buf_size_idxs = 0;
2619   for (i=0;i<n_recvs;i++) {
2620     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2621     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2622     ptr_idxs += olengths_idxs[i];
2623   }
2624   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2625   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2626   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2627 
2628   /* infer new local matrix type from received local matrices type */
2629   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2630   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2631   new_local_type_private = MATAIJ_PRIVATE;
2632   new_local_type = MATSEQAIJ;
2633   if (n_recvs) {
2634     new_local_type_private = send_buffer_idxs[0];
2635     ptr_idxs = recv_buffer_idxs;
2636     for (i=0;i<n_recvs;i++) {
2637       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2638         new_local_type_private = MATAIJ_PRIVATE;
2639         break;
2640       }
2641       ptr_idxs += olengths_idxs[i];
2642     }
2643     switch (new_local_type_private) {
2644       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2645         new_local_type = MATSEQAIJ;
2646         bs = 1;
2647         break;
2648       case MATAIJ_PRIVATE:
2649         new_local_type = MATSEQAIJ;
2650         bs = 1;
2651         break;
2652       case MATBAIJ_PRIVATE:
2653         new_local_type = MATSEQBAIJ;
2654         break;
2655       case MATSBAIJ_PRIVATE:
2656         new_local_type = MATSEQSBAIJ;
2657         break;
2658       default:
2659         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2660         break;
2661     }
2662   }
2663 
2664   /* create MATIS object */
2665   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2666   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2667   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2668   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2669   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2670   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2671 
2672   /* set values */
2673   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2674   ptr_vals = recv_buffer_vals;
2675   ptr_idxs = recv_buffer_idxs;
2676   for (i=0;i<n_recvs;i++) {
2677     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2678       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2679       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2680       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2681       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2682       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2683     }
2684     ptr_idxs += olengths_idxs[i];
2685     ptr_vals += olengths_vals[i];
2686   }
2687   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2688   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2689   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2690   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2691 
2692   { /* check */
2693     Vec       lvec,rvec;
2694     PetscReal infty_error;
2695 
2696     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2697     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2698     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2699     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2700     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2701     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2702     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2703     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2704     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2705   }
2706 
2707   /* free workspace */
2708   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2709   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2710   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2711   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2712   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2713   if (isdense) {
2714     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2715     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2716   } else {
2717     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2718   }
2719   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2720   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2721   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2722   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2723   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2724   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2725   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2726   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2727   ierr = PetscFree(onodes);CHKERRQ(ierr);
2728   /* get back new mat */
2729   *mat_n = new_mat;
2730   PetscFunctionReturn(0);
2731 }
2732 
2733 #undef __FUNCT__
2734 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2735 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2736 {
2737   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2738   PC_IS                  *pcis = (PC_IS*)pc->data;
2739   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2740   MatNullSpace           CoarseNullSpace=NULL;
2741   ISLocalToGlobalMapping coarse_islg;
2742   IS                     coarse_is;
2743   PetscInt               coarse_size,*local_primal_indices=NULL;
2744   PCType                 coarse_pc_type;
2745   KSPType                coarse_ksp_type;
2746   PetscBool              use_multilevel,setsym,issym;
2747   PetscErrorCode         ierr;
2748 
2749   PetscFunctionBegin;
2750   /* Assign global numbering to coarse dofs */
2751   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2752 
2753   /* adapt coarse problem type if needed */
2754   use_multilevel = PETSC_FALSE;
2755   if (pcbddc->current_level < pcbddc->max_levels) {
2756     PetscInt im_active,active_procs;
2757     /* count "active processes" */
2758     im_active = !!(pcis->n);
2759     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2760     if (active_procs/pcbddc->coarsening_ratio < 2) {
2761       if (pcbddc->dbg_flag) {
2762         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2763        ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2764       }
2765     } else {
2766       use_multilevel = PETSC_TRUE;
2767     }
2768   } else {
2769     if (pcbddc->dbg_flag) {
2770       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
2771       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2772     }
2773   }
2774 
2775   /* creates MATIS object for coarse matrix */
2776   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2777   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2778   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2779   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2780   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2781   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783 
2784   /* Convert or subassemble MATIS depending on chosen coarse solver approach */
2785   if (use_multilevel) {
2786     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2787     /* just for now */
2788     coarse_pc_type  = PCBDDC;
2789     coarse_ksp_type = KSPRICHARDSON;
2790   } else {
2791     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2792     /* just for now */
2793     coarse_pc_type  = PCREDUNDANT;
2794     coarse_ksp_type = KSPPREONLY;
2795   }
2796 
2797   /* propagate symmetry info to coarse matrix */
2798   issym = PETSC_FALSE;
2799   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2800   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2801 
2802   /* create local to global scatters coarse problem */
2803   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2804   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2805 
2806   /* free memory no longer needed */
2807   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2808   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2809   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2810   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2811 
2812   /* Compute coarse null space */
2813   if (pcbddc->NullSpace) {
2814     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2815   }
2816 
2817   /* KSP for coarse problem */
2818   {
2819     PC           pc_temp;
2820     PetscBool    isbddc,isnn;
2821     PetscInt     max_it;
2822     MatStructure matstruct;
2823 
2824     if (!pcbddc->coarse_ksp) { /* create the coarse KSP once */
2825       ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2826       ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2827       ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2828       ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2829       ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2830       ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2831       ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
2832     }
2833     ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
2834     ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
2835     /* Allow user's customization */
2836     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,"coarse_");CHKERRQ(ierr);
2837     /* Propagate BDDC info to next level */
2838     ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2839     ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2840     ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2841     if (CoarseNullSpace) {
2842       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2843     }
2844     ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2845     /* hack! TODO: MOVE KSP init before matrix assembly */
2846     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2847     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2848     ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
2849     if (!isbddc && !isnn) {
2850       ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
2851       ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2852       ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
2853     }
2854     if (!isbddc && CoarseNullSpace) {
2855       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2856     }
2857     ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2858 
2859     ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2860     if (max_it < 3) {
2861       ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2862       if (isbddc) { /* workaround: TODO see if it could be avoided */
2863         ierr = PCBDDCSetUseExactDirichlet(pc_temp,PETSC_FALSE);CHKERRQ(ierr);
2864       }
2865     }
2866   }
2867   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2868 
2869   /* Check coarse problem if requested */
2870   if (pcbddc->dbg_flag) {
2871     KSP       check_ksp;
2872     KSPType   check_ksp_type;
2873     PC        check_pc;
2874     Vec       check_vec;
2875     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
2876     PetscInt  its;
2877     PetscBool ispreonly;
2878 
2879     /* Create ksp object suitable for estimation of extreme eigenvalues */
2880     ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&check_ksp);CHKERRQ(ierr);
2881     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2882     ierr = KSPSetTolerances(check_ksp,1.e-8,1.e-12,1000,coarse_size);CHKERRQ(ierr);
2883     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
2884     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
2885     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
2886     if (ispreonly) {
2887       check_ksp_type = KSPPREONLY;
2888     } else {
2889       if (issym) check_ksp_type = KSPCG;
2890       else check_ksp_type = KSPGMRES;
2891       ierr = KSPSetComputeSingularValues(check_ksp,PETSC_TRUE);CHKERRQ(ierr);
2892     }
2893     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
2894     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
2895     /* create random vec */
2896     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
2897     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
2898     if (CoarseNullSpace) {
2899       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
2900     }
2901     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2902     /* solve coarse problem */
2903     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
2904     if (CoarseNullSpace) {
2905       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
2906     }
2907     /* check coarse problem residual error */
2908     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
2909     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2910     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2911     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
2912     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
2913     /* get eigenvalue estimation if inexact */
2914     if (!ispreonly) {
2915       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
2916       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
2917       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): % 1.14e %1.14e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
2918     }
2919     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.14e\n",infty_error);CHKERRQ(ierr);
2920     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.14e\n",abs_infty_error);CHKERRQ(ierr);
2921     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2922     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
2923   }
2924   /* free memory */
2925   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
2926   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
2927   PetscFunctionReturn(0);
2928 }
2929 
2930 #undef __FUNCT__
2931 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
2932 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
2933 {
2934   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2935   PC_IS*         pcis = (PC_IS*)pc->data;
2936   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
2937   PetscInt       i,j,coarse_size;
2938   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   /* get indices in local ordering for vertices and constraints */
2943   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
2944   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
2945   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
2946   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2947   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
2948   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
2949   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
2950 
2951   /* Compute global number of coarse dofs */
2952   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2953 
2954   /* check numbering */
2955   if (pcbddc->dbg_flag) {
2956     PetscScalar coarsesum,*array;
2957 
2958     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2959     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
2960     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
2961     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
2962     for (i=0;i<pcbddc->local_primal_size;i++) {
2963       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
2964     }
2965     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
2966     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
2967     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2968     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2969     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2970     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2971     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2972     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2973     for (i=0;i<pcis->n;i++) {
2974       if (array[i] == 1.0) {
2975         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
2976       }
2977     }
2978     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2979     for (i=0;i<pcis->n;i++) {
2980       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
2981     }
2982     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
2983     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
2984     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2985     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2986     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
2987     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
2988     if (pcbddc->dbg_flag > 1) {
2989       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
2990       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2991       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
2992       for (i=0;i<pcbddc->local_primal_size;i++) {
2993         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
2994       }
2995       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2996     }
2997     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2998   }
2999   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3000   /* get back data */
3001   *coarse_size_n = coarse_size;
3002   *local_primal_indices_n = local_primal_indices;
3003   PetscFunctionReturn(0);
3004 }
3005 
3006