xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 85c4d30342a77aaa058e80c0f21364f302bd83b5)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Allocate needed vectors */
18   ierr = PCBDDCCreateWorkVectors(pc);CHKERRQ(ierr);
19 
20   /* Setup local scatters R_to_B and (optionally) R_to_D : PCBDDCCreateWorkVectors should be called first! */
21   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
22 
23   /* Setup local solvers ksp_D and ksp_R */
24   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
25 
26   /* Change global null space passed in by the user if change of basis has been requested */
27   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
28     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
29   }
30 
31   /*
32      Setup local correction and local part of coarse basis.
33      Gives back the dense local part of the coarse matrix in column major ordering
34   */
35   ierr = PCBDDCSetUpCoarseLocal(pc,&coarse_submat_vals);CHKERRQ(ierr);
36 
37   /* Compute total number of coarse nodes and setup coarse solver */
38   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
39 
40   /* free */
41   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "PCBDDCResetCustomization"
47 PetscErrorCode PCBDDCResetCustomization(PC pc)
48 {
49   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
50   PetscInt       i;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
56   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
58   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
59   for (i=0;i<pcbddc->n_ISForDofs;i++) {
60     ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
61   }
62   ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = VecDestroy(&pcbddc->vec4_D);CHKERRQ(ierr);
101   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
104   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 #undef __FUNCT__
109 #define __FUNCT__ "PCBDDCCreateWorkVectors"
110 PetscErrorCode PCBDDCCreateWorkVectors(PC pc)
111 {
112   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
113   PC_IS          *pcis = (PC_IS*)pc->data;
114   VecType        impVecType;
115   PetscInt       n_vertices,n_constraints,local_primal_size,n_R;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,NULL);CHKERRQ(ierr);
120   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&n_constraints,NULL);CHKERRQ(ierr);
121   local_primal_size = n_constraints+n_vertices;
122   n_R = pcis->n-n_vertices;
123   /* local work vectors */
124   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
125   ierr = VecDuplicate(pcis->vec1_D,&pcbddc->vec4_D);CHKERRQ(ierr);
126   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
127   ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
128   ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
129   ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
130   ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
131   ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,local_primal_size);CHKERRQ(ierr);
132   ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
133   if (n_constraints) {
134     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
135     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
136     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "PCBDDCSetUpCoarseLocal"
143 PetscErrorCode PCBDDCSetUpCoarseLocal(PC pc, PetscScalar **coarse_submat_vals_n)
144 {
145   PetscErrorCode         ierr;
146   /* pointers to pcis and pcbddc */
147   PC_IS*                 pcis = (PC_IS*)pc->data;
148   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
149   /* submatrices of local problem */
150   Mat                    A_RV,A_VR,A_VV;
151   /* working matrices */
152   Mat                    M1,M2,M3,C_CR;
153   /* working vectors */
154   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
155   /* additional working stuff */
156   IS                     is_aux;
157   ISLocalToGlobalMapping BtoNmap;
158   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
159   const PetscScalar      *array,*row_cmat_values;
160   const PetscInt         *row_cmat_indices,*idx_R_local;
161   PetscInt               *vertices,*idx_V_B,*auxindices;
162   PetscInt               n_vertices,n_constraints,size_of_constraint;
163   PetscInt               i,j,n_R,n_D,n_B;
164   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
165   /* Vector and matrix types */
166   VecType                impVecType;
167   MatType                impMatType;
168   /* some shortcuts to scalars */
169   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
170   /* for debugging purposes */
171   PetscReal              *coarsefunctions_errors,*constraints_errors;
172 
173   PetscFunctionBegin;
174   /* get number of vertices and their local indices */
175   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
176   n_constraints = pcbddc->local_primal_size-n_vertices;
177   /* Set Non-overlapping dimensions */
178   n_B = pcis->n_B; n_D = pcis->n - n_B;
179   n_R = pcis->n-n_vertices;
180 
181   /* Set types for local objects needed by BDDC precondtioner */
182   impMatType = MATSEQDENSE;
183   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
184 
185   /* Allocating some extra storage just to be safe */
186   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
187   for (i=0;i<pcis->n;i++) auxindices[i]=i;
188 
189   /* vertices in boundary numbering */
190   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
191   ierr = ISLocalToGlobalMappingCreateIS(pcis->is_B_local,&BtoNmap);CHKERRQ(ierr);
192   ierr = ISGlobalToLocalMappingApply(BtoNmap,IS_GTOLM_DROP,n_vertices,vertices,&i,idx_V_B);CHKERRQ(ierr);
193   ierr = ISLocalToGlobalMappingDestroy(&BtoNmap);CHKERRQ(ierr);
194   if (i != n_vertices) {
195     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
196   }
197 
198   /* some work vectors on vertices and/or constraints */
199   if (n_vertices) {
200     ierr = VecCreate(PETSC_COMM_SELF,&vec1_V);CHKERRQ(ierr);
201     ierr = VecSetSizes(vec1_V,n_vertices,n_vertices);CHKERRQ(ierr);
202     ierr = VecSetType(vec1_V,impVecType);CHKERRQ(ierr);
203     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
204   }
205   if (n_constraints) {
206     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
207     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
208   }
209 
210   /* Precompute stuffs needed for preprocessing and application of BDDC*/
211   if (n_constraints) {
212     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
213     ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
214     ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
215     ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
216 
217     /* Extract constraints on R nodes: C_{CR}  */
218     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
219     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
220     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
221 
222     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
223     for (i=0;i<n_constraints;i++) {
224       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
225       /* Get row of constraint matrix in R numbering */
226       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
227       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
228       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
229       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
230       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
231       /* Solve for row of constraint matrix in R numbering */
232       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
233       /* Set values in local_auxmat2 */
234       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
235       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
236       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
237     }
238     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
239     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
240     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
241 
242     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
243     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
244     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
245     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
246     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
247     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
248     ierr = MatSetUp(M1);CHKERRQ(ierr);
249     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
250     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
251     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
252     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
253     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
254     ierr = MatDestroy(&M2);CHKERRQ(ierr);
255     ierr = MatDestroy(&M3);CHKERRQ(ierr);
256     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
257     ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
258   }
259 
260   /* Get submatrices from subdomain matrix */
261   if (n_vertices) {
262     ierr = ISCreateGeneral(PETSC_COMM_SELF,n_vertices,vertices,PETSC_COPY_VALUES,&is_aux);CHKERRQ(ierr);
263     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
264     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
265     ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
266     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
267   }
268 
269   /* Matrix of coarse basis functions (local) */
270   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
271   ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
272   ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
273   ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
274   if (pcbddc->switch_static || pcbddc->dbg_flag) {
275     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
276     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
277     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
278     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
279   }
280 
281   if (pcbddc->dbg_flag) {
282     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
283     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
284     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
285   }
286   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
287   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
288 
289   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
290 
291   /* vertices */
292   for (i=0;i<n_vertices;i++) {
293     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
294     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
295     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
296     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
297     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
298     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
299     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
300     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
301     if (n_constraints) {
302       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
303       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
304       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
305     }
306     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
307     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
308 
309     /* Set values in coarse basis function and subdomain part of coarse_mat */
310     /* coarse basis functions */
311     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
312     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
313     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
314     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
315     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
316     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
317     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
318     if (pcbddc->switch_static || pcbddc->dbg_flag) {
319       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
320       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
321       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
322       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
323       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
324     }
325     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
326     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
327     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
328     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
329     if (n_constraints) {
330       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
331       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
332       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
333     }
334 
335     /* check */
336     if (pcbddc->dbg_flag) {
337       /* assemble subdomain vector on local nodes */
338       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
339       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
340       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
341       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
342       ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
343       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
344       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
345       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
346       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
347       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
348       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
349       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
350       if (n_constraints) {
351         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
352         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
353         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
354       }
355       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
356       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
357       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
358       /* check saddle point solution */
359       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
360       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
361       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
362       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
363       /* shift by the identity matrix */
364       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
365       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
366       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
367       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
368     }
369   }
370 
371   /* constraints */
372   for (i=0;i<n_constraints;i++) {
373     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
374     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
375     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
376     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
377     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
378     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
379     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
381     if (n_vertices) {
382       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
383     }
384     /* Set values in coarse basis function and subdomain part of coarse_mat */
385     /* coarse basis functions */
386     j = i+n_vertices; /* don't touch this! */
387     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
388     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
389     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
390     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
391     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
392     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
393     if (pcbddc->switch_static || pcbddc->dbg_flag) {
394       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
396       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
397       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
398       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
399     }
400     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
401     if (n_vertices) {
402       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
403       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
405     }
406     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
407     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
408     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
409 
410     if (pcbddc->dbg_flag) {
411       /* assemble subdomain vector on nodes */
412       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
413       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
414       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
415       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
416       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
417       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
418       /* assemble subdomain vector of lagrange multipliers */
419       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
420       if (n_vertices) {
421         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
422         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
423         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
424       }
425       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
426       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
427       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
428       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
429       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
430       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
431       /* check saddle point solution */
432       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
433       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
434       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
435       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
436       /* shift by the identity matrix */
437       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
438       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
439       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
440       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
441     }
442   }
443   /* call assembling routines for local coarse basis */
444   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
445   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
446   if (pcbddc->switch_static || pcbddc->dbg_flag) {
447     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
448     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
449   }
450 
451   /* compute other basis functions for non-symmetric problems */
452   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
453   if (!setsym || (setsym && !issym)) {
454     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
455     ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
456     ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
457     ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
458     if (pcbddc->switch_static || pcbddc->dbg_flag) {
459       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
460       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
461       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
462       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
463     }
464     for (i=0;i<pcbddc->local_primal_size;i++) {
465       if (n_constraints) {
466         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
467         for (j=0;j<n_constraints;j++) {
468           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
469         }
470         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
471         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
472       }
473       if (i<n_vertices) {
474         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
475         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
476         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
477         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
478         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
479         if (n_constraints) {
480           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
481         }
482       } else {
483         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
484       }
485       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
486       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
487       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
488       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
489       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
490       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
491       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
492       if (i<n_vertices) {
493         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
494       }
495       if (pcbddc->switch_static || pcbddc->dbg_flag) {
496         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
497         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
498         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
499         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
500         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
501       }
502 
503       if (pcbddc->dbg_flag) {
504         /* assemble subdomain vector on nodes */
505         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
506         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
507         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
508         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
509         if (i<n_vertices) {
510           ierr = VecSetValue(pcis->vec1_N,vertices[i],one,INSERT_VALUES);CHKERRQ(ierr);
511         }
512         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
513         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
514         /* assemble subdomain vector of lagrange multipliers */
515         for (j=0;j<pcbddc->local_primal_size;j++) {
516           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
517         }
518         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520         /* check saddle point solution */
521         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
522         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
523         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
524         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
525         /* shift by the identity matrix */
526         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
527         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
528         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
529         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
530       }
531     }
532     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
533     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
534     if (pcbddc->switch_static || pcbddc->dbg_flag) {
535       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
536       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
537     }
538   }
539   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
540   /* Checking coarse_sub_mat and coarse basis functios */
541   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
542   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
543   if (pcbddc->dbg_flag) {
544     Mat         coarse_sub_mat;
545     Mat         AUXMAT,TM1,TM2,TM3,TM4;
546     Mat         coarse_phi_D,coarse_phi_B;
547     Mat         coarse_psi_D,coarse_psi_B;
548     Mat         A_II,A_BB,A_IB,A_BI;
549     MatType     checkmattype=MATSEQAIJ;
550     PetscReal   real_value;
551 
552     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
553     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
554     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
555     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
556     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
557     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
558     if (pcbddc->coarse_psi_B) {
559       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
560       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
561     }
562     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
563 
564     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
565     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
566     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
567     if (pcbddc->coarse_psi_B) {
568       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
569       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
570       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
571       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
572       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
573       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
574       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
575       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
576       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
577       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
578       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
579       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
580     } else {
581       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
582       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
583       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
584       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
585       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
586       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
587       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
588       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
589     }
590     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
591     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
592     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
593     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
594     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
595     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
596     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
597     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
598     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
599     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
600     for (i=0;i<pcbddc->local_primal_size;i++) {
601       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
602     }
603     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
604     for (i=0;i<pcbddc->local_primal_size;i++) {
605       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
606     }
607     if (pcbddc->coarse_psi_B) {
608       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
609       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
610         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
611       }
612       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
613       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
614         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
615       }
616     }
617     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
618     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
619     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
620     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
621     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
622     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
623     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
624     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
625     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
626     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
627     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
628     if (pcbddc->coarse_psi_B) {
629       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
630       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
631     }
632     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
633     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
634     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
635     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
636   }
637   /* free memory */
638   if (n_vertices) {
639     ierr = PetscFree(vertices);CHKERRQ(ierr);
640     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
641     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
642     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
643     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
644     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
645   }
646   if (n_constraints) {
647     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
648     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
649     ierr = MatDestroy(&M1);CHKERRQ(ierr);
650     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
651   }
652   ierr = PetscFree(auxindices);CHKERRQ(ierr);
653   /* get back data */
654   *coarse_submat_vals_n = coarse_submat_vals;
655   PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNCT__
659 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
660 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
661 {
662   PC_IS*            pcis = (PC_IS*)(pc->data);
663   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
664   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
665   /* manage repeated solves */
666   MatReuse          reuse;
667   MatStructure      matstruct;
668   PetscErrorCode    ierr;
669 
670   PetscFunctionBegin;
671   /* get mat flags */
672   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
673   reuse = MAT_INITIAL_MATRIX;
674   if (pc->setupcalled) {
675     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
676     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
677     if (matstruct == SAME_NONZERO_PATTERN) {
678       reuse = MAT_REUSE_MATRIX;
679     } else {
680       reuse = MAT_INITIAL_MATRIX;
681     }
682   }
683   if (reuse == MAT_INITIAL_MATRIX) {
684     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
685     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
686     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
687     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
688     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
689   }
690 
691   /* transform local matrices if needed */
692   if (pcbddc->use_change_of_basis) {
693     Mat         change_mat_all;
694     PetscScalar *row_cmat_values;
695     PetscInt    *row_cmat_indices;
696     PetscInt    *nnz,*is_indices,*temp_indices;
697     PetscInt    i,j,k,n_D,n_B;
698 
699     /* Get Non-overlapping dimensions */
700     n_B = pcis->n_B;
701     n_D = pcis->n-n_B;
702 
703     /* compute nonzero structure of change of basis on all local nodes */
704     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
705     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
706     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
707     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
708     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
709     k=1;
710     for (i=0;i<n_B;i++) {
711       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
712       nnz[is_indices[i]]=j;
713       if (k < j) k = j;
714       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
715     }
716     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
717     /* assemble change of basis matrix on the whole set of local dofs */
718     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
719     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
720     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
721     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
722     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
723     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
724     for (i=0;i<n_D;i++) {
725       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
726     }
727     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
728     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
729     for (i=0;i<n_B;i++) {
730       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
731       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
732       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
733       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
734     }
735     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
736     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
737     /* TODO: HOW TO WORK WITH BAIJ? PtAP not provided */
738     ierr = MatGetBlockSize(matis->A,&i);CHKERRQ(ierr);
739     if (i==1) {
740       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
741     } else {
742       Mat work_mat;
743       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
744       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
745       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
746     }
747     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
748     ierr = PetscFree(nnz);CHKERRQ(ierr);
749     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
750   } else {
751     /* without change of basis, the local matrix is unchanged */
752     if (!pcbddc->local_mat) {
753       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
754       pcbddc->local_mat = matis->A;
755     }
756   }
757 
758   /* get submatrices */
759   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
760   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
761   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
762   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
769 {
770   PC_IS*         pcis = (PC_IS*)(pc->data);
771   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
772   IS             is_aux1,is_aux2;
773   PetscInt       *vertices,*aux_array1,*aux_array2,*is_indices,*idx_R_local;
774   PetscInt       n_vertices,n_constraints,i,j,n_R,n_D,n_B;
775   PetscBT        bitmask;
776   PetscErrorCode ierr;
777 
778   PetscFunctionBegin;
779   /* Set Non-overlapping dimensions */
780   n_B = pcis->n_B; n_D = pcis->n - n_B;
781   /* get vertex indices from constraint matrix */
782   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&n_vertices,&vertices);CHKERRQ(ierr);
783   /* Set number of constraints */
784   n_constraints = pcbddc->local_primal_size-n_vertices;
785   /* create auxiliary bitmask */
786   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
787   for (i=0;i<n_vertices;i++) {
788     ierr = PetscBTSet(bitmask,vertices[i]);CHKERRQ(ierr);
789   }
790   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
791   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
792   for (i=0, n_R=0; i<pcis->n; i++) {
793     if (!PetscBTLookup(bitmask,i)) {
794       idx_R_local[n_R] = i;
795       n_R++;
796     }
797   }
798   ierr = PetscFree(vertices);CHKERRQ(ierr);
799   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_R,idx_R_local,PETSC_OWN_POINTER,&pcbddc->is_R_local);CHKERRQ(ierr);
800 
801   /* print some info if requested */
802   if (pcbddc->dbg_flag) {
803     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
804     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
805     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
806     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
807     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,n_constraints,pcbddc->local_primal_size);CHKERRQ(ierr);
808     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
809     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
810   }
811 
812   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
813   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
814   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
815   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816   for (i=0; i<n_D; i++) {
817     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
818   }
819   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
820   for (i=0, j=0; i<n_R; i++) {
821     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
822       aux_array1[j++] = i;
823     }
824   }
825   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
826   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
827   for (i=0, j=0; i<n_B; i++) {
828     if (!PetscBTLookup(bitmask,is_indices[i])) {
829       aux_array2[j++] = i;
830     }
831   }
832   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
833   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
834   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
835   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
836   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
837 
838   if (pcbddc->switch_static || pcbddc->dbg_flag) {
839     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
840     for (i=0, j=0; i<n_R; i++) {
841       if (PetscBTLookup(bitmask,idx_R_local[i])) {
842         aux_array1[j++] = i;
843       }
844     }
845     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
846     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
847     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
848   }
849   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
856 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
857 {
858   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
859   PC_IS          *pcis = (PC_IS*)pc->data;
860   PC             pc_temp;
861   Mat            A_RR;
862   Vec            vec1,vec2,vec3;
863   MatStructure   matstruct;
864   PetscScalar    m_one = -1.0;
865   PetscReal      value;
866   PetscInt       n_D,n_R;
867   PetscBool      use_exact,use_exact_reduced;
868   PetscErrorCode ierr;
869   /* prefixes stuff */
870   char           dir_prefix[256],neu_prefix[256],str_level[3];
871   size_t         len;
872 
873   PetscFunctionBegin;
874   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
875 
876   /* compute prefixes */
877   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
878   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
879   if (!pcbddc->current_level) {
880     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
881     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
882     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
883     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
884   } else {
885     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
886     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
887     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
888     len -= 15; /* remove "pc_bddc_coarse_" */
889     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
890     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
891     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
892     *(dir_prefix+len)='\0';
893     *(neu_prefix+len)='\0';
894     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
895     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
896     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
897     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
898   }
899 
900   /* DIRICHLET PROBLEM */
901   /* Matrix for Dirichlet problem is pcis->A_II */
902   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
903   if (!pcbddc->ksp_D) { /* create object if not yet build */
904     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
905     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
906     /* default */
907     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
908     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
909     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
910     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
911     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
912   }
913   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
914   /* Allow user's customization */
915   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
916   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
917   if (!n_D) {
918     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
919     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
920   }
921   /* Set Up KSP for Dirichlet problem of BDDC */
922   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
923   /* set ksp_D into pcis data */
924   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
925   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
926   pcis->ksp_D = pcbddc->ksp_D;
927 
928   /* NEUMANN PROBLEM */
929   /* Matrix for Neumann problem is A_RR -> we need to create it */
930   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
931   ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr);
932   if (!pcbddc->ksp_R) { /* create object if not yet build */
933     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
934     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
935     /* default */
936     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
937     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
938     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
939     ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
940     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
941   }
942   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
943   /* Allow user's customization */
944   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
945   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
946   if (!n_R) {
947     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
948     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
949   }
950   /* Set Up KSP for Neumann problem of BDDC */
951   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
952 
953   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
954 
955   /* Dirichlet */
956   ierr = MatGetVecs(pcis->A_II,&vec1,&vec2);CHKERRQ(ierr);
957   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
958   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
959   ierr = MatMult(pcis->A_II,vec1,vec2);CHKERRQ(ierr);
960   ierr = KSPSolve(pcbddc->ksp_D,vec2,vec3);CHKERRQ(ierr);
961   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
962   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
963   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
964   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
965   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
966   /* need to be adapted? */
967   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
968   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
969   ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
970   /* print info */
971   if (pcbddc->dbg_flag) {
972     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
973     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
974     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
975     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
976     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
977   }
978   if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
979     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
980   }
981 
982   /* Neumann */
983   ierr = MatGetVecs(A_RR,&vec1,&vec2);CHKERRQ(ierr);
984   ierr = VecDuplicate(vec1,&vec3);CHKERRQ(ierr);
985   ierr = VecSetRandom(vec1,NULL);CHKERRQ(ierr);
986   ierr = MatMult(A_RR,vec1,vec2);CHKERRQ(ierr);
987   ierr = KSPSolve(pcbddc->ksp_R,vec2,vec3);CHKERRQ(ierr);
988   ierr = VecAXPY(vec3,m_one,vec1);CHKERRQ(ierr);
989   ierr = VecNorm(vec3,NORM_INFINITY,&value);CHKERRQ(ierr);
990   ierr = VecDestroy(&vec1);CHKERRQ(ierr);
991   ierr = VecDestroy(&vec2);CHKERRQ(ierr);
992   ierr = VecDestroy(&vec3);CHKERRQ(ierr);
993   /* need to be adapted? */
994   use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
995   ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
996   /* print info */
997   if (pcbddc->dbg_flag) {
998     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
999     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1000   }
1001   if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1002     ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1003   }
1004   /* free Neumann problem's matrix */
1005   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1011 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1012 {
1013   PetscErrorCode ierr;
1014   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1015 
1016   PetscFunctionBegin;
1017   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1018   if (pcbddc->local_auxmat1) {
1019     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1020     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1021   }
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1027 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1028 {
1029   PetscErrorCode ierr;
1030   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1031   PC_IS*            pcis = (PC_IS*)  (pc->data);
1032   const PetscScalar zero = 0.0;
1033 
1034   PetscFunctionBegin;
1035   /* Application of PHI^T (or PSI^T)  */
1036   if (pcbddc->coarse_psi_B) {
1037     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1038     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1039   } else {
1040     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1041     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1042   }
1043   /* Scatter data of coarse_rhs */
1044   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1045   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046 
1047   /* Local solution on R nodes */
1048   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1049   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1050   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1051   if (pcbddc->switch_static) {
1052     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1053     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054   }
1055   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1056   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1057   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1059   if (pcbddc->switch_static) {
1060     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1061     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062   }
1063 
1064   /* Coarse solution */
1065   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1067     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1068   }
1069   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1070   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1071 
1072   /* Sum contributions from two levels */
1073   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1074   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1080 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1081 {
1082   PetscErrorCode ierr;
1083   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1084 
1085   PetscFunctionBegin;
1086   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1092 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1093 {
1094   PetscErrorCode ierr;
1095   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1096 
1097   PetscFunctionBegin;
1098   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 /* uncomment for testing purposes */
1103 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1104 #undef __FUNCT__
1105 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1106 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1107 {
1108   PetscErrorCode    ierr;
1109   PC_IS*            pcis = (PC_IS*)(pc->data);
1110   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1111   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1112   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1113   MatType           impMatType=MATSEQAIJ;
1114   /* one and zero */
1115   PetscScalar       one=1.0,zero=0.0;
1116   /* space to store constraints and their local indices */
1117   PetscScalar       *temp_quadrature_constraint;
1118   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1119   /* iterators */
1120   PetscInt          i,j,k,total_counts,temp_start_ptr;
1121   /* stuff to store connected components stored in pcbddc->mat_graph */
1122   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1123   PetscInt          n_ISForFaces,n_ISForEdges;
1124   /* near null space stuff */
1125   MatNullSpace      nearnullsp;
1126   const Vec         *nearnullvecs;
1127   Vec               *localnearnullsp;
1128   PetscBool         nnsp_has_cnst;
1129   PetscInt          nnsp_size;
1130   PetscScalar       *array;
1131   /* BLAS integers */
1132   PetscBLASInt      lwork,lierr;
1133   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1134   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1135   /* LAPACK working arrays for SVD or POD */
1136   PetscBool         skip_lapack;
1137   PetscScalar       *work;
1138   PetscReal         *singular_vals;
1139 #if defined(PETSC_USE_COMPLEX)
1140   PetscReal         *rwork;
1141 #endif
1142 #if defined(PETSC_MISSING_LAPACK_GESVD)
1143   PetscBLASInt      Blas_one_2=1;
1144   PetscScalar       *temp_basis,*correlation_mat;
1145 #else
1146   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1147   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1148 #endif
1149   /* change of basis */
1150   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1151   PetscBool         boolforchange;
1152   PetscBT           touched,change_basis;
1153   /* auxiliary stuff */
1154   PetscInt          *nnz,*is_indices,*local_to_B;
1155   /* some quantities */
1156   PetscInt          n_vertices,total_primal_vertices;
1157   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1158 
1159 
1160   PetscFunctionBegin;
1161   /* Get index sets for faces, edges and vertices from graph */
1162   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1163     pcbddc->use_vertices = PETSC_TRUE;
1164   }
1165   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1166   /* HACK: provide functions to set change of basis */
1167   if (!ISForVertices && pcbddc->NullSpace) {
1168     pcbddc->use_change_of_basis = PETSC_TRUE;
1169     pcbddc->use_change_on_faces = PETSC_FALSE;
1170   }
1171   /* print some info */
1172   if (pcbddc->dbg_flag) {
1173     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1174     i = 0;
1175     if (ISForVertices) {
1176       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1177     }
1178     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1179     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1180     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1181     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1182   }
1183   /* check if near null space is attached to global mat */
1184   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1185   if (nearnullsp) {
1186     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1187   } else { /* if near null space is not provided BDDC uses constants by default */
1188     nnsp_size = 0;
1189     nnsp_has_cnst = PETSC_TRUE;
1190   }
1191   /* get max number of constraints on a single cc */
1192   max_constraints = nnsp_size;
1193   if (nnsp_has_cnst) max_constraints++;
1194 
1195   /*
1196        Evaluate maximum storage size needed by the procedure
1197        - temp_indices will contain start index of each constraint stored as follows
1198        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1199        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1200        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1201                                                                                                                                                          */
1202   total_counts = n_ISForFaces+n_ISForEdges;
1203   total_counts *= max_constraints;
1204   n_vertices = 0;
1205   if (ISForVertices) {
1206     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1207   }
1208   total_counts += n_vertices;
1209   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1210   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1211   total_counts = 0;
1212   max_size_of_constraint = 0;
1213   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1214     if (i<n_ISForEdges) {
1215       used_IS = &ISForEdges[i];
1216     } else {
1217       used_IS = &ISForFaces[i-n_ISForEdges];
1218     }
1219     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1220     total_counts += j;
1221     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1222   }
1223   total_counts *= max_constraints;
1224   total_counts += n_vertices;
1225   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1226   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1227   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1228   /* local to boundary numbering */
1229   ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&local_to_B);CHKERRQ(ierr);
1230   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1231   for (i=0;i<pcis->n;i++) local_to_B[i]=-1;
1232   for (i=0;i<pcis->n_B;i++) local_to_B[is_indices[i]]=i;
1233   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1234   /* get local part of global near null space vectors */
1235   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1236   for (k=0;k<nnsp_size;k++) {
1237     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1238     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240   }
1241 
1242   /* whether or not to skip lapack calls */
1243   skip_lapack = PETSC_TRUE;
1244   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1245 
1246   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1247   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1248     PetscScalar temp_work;
1249 #if defined(PETSC_MISSING_LAPACK_GESVD)
1250     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1251     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1252     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1253     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1254 #if defined(PETSC_USE_COMPLEX)
1255     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1256 #endif
1257     /* now we evaluate the optimal workspace using query with lwork=-1 */
1258     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1259     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1260     lwork = -1;
1261     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1262 #if !defined(PETSC_USE_COMPLEX)
1263     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1264 #else
1265     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1266 #endif
1267     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1268     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1269 #else /* on missing GESVD */
1270     /* SVD */
1271     PetscInt max_n,min_n;
1272     max_n = max_size_of_constraint;
1273     min_n = max_constraints;
1274     if (max_size_of_constraint < max_constraints) {
1275       min_n = max_size_of_constraint;
1276       max_n = max_constraints;
1277     }
1278     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1279 #if defined(PETSC_USE_COMPLEX)
1280     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1281 #endif
1282     /* now we evaluate the optimal workspace using query with lwork=-1 */
1283     lwork = -1;
1284     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1285     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1286     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1287     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1288 #if !defined(PETSC_USE_COMPLEX)
1289     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1290 #else
1291     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1292 #endif
1293     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1294     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1295 #endif /* on missing GESVD */
1296     /* Allocate optimal workspace */
1297     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1298     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1299   }
1300   /* Now we can loop on constraining sets */
1301   total_counts = 0;
1302   temp_indices[0] = 0;
1303   /* vertices */
1304   if (ISForVertices) {
1305     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1306     if (nnsp_has_cnst) { /* consider all vertices */
1307       for (i=0;i<n_vertices;i++) {
1308         temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1309         temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1310         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1311         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1312         total_counts++;
1313       }
1314     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1315       PetscBool used_vertex;
1316       for (i=0;i<n_vertices;i++) {
1317         used_vertex = PETSC_FALSE;
1318         k = 0;
1319         while (!used_vertex && k<nnsp_size) {
1320           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1321           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1322             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1323             temp_indices_to_constraint_B[temp_indices[total_counts]]=local_to_B[is_indices[i]];
1324             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1325             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1326             total_counts++;
1327             used_vertex = PETSC_TRUE;
1328           }
1329           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1330           k++;
1331         }
1332       }
1333     }
1334     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1335     n_vertices = total_counts;
1336   }
1337 
1338   /* edges and faces */
1339   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1340     if (i<n_ISForEdges) {
1341       used_IS = &ISForEdges[i];
1342       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1343     } else {
1344       used_IS = &ISForFaces[i-n_ISForEdges];
1345       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1346     }
1347     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1348     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1349     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1350     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1351     /* change of basis should not be performed on local periodic nodes */
1352     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1353     if (nnsp_has_cnst) {
1354       PetscScalar quad_value;
1355       temp_constraints++;
1356       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1357       for (j=0;j<size_of_constraint;j++) {
1358         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1359         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1360         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1361       }
1362       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1363       if (boolforchange) {
1364         ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1365       }
1366       total_counts++;
1367     }
1368     for (k=0;k<nnsp_size;k++) {
1369       PetscReal real_value;
1370       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1371       for (j=0;j<size_of_constraint;j++) {
1372         temp_indices_to_constraint[temp_indices[total_counts]+j]=is_indices[j];
1373         temp_indices_to_constraint_B[temp_indices[total_counts]+j]=local_to_B[is_indices[j]];
1374         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1375       }
1376       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1377       /* check if array is null on the connected component */
1378       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1379       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1380       if (real_value > 0.0) { /* keep indices and values */
1381         temp_constraints++;
1382         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1383         if (boolforchange) {
1384           ierr = PetscBTSet(change_basis,total_counts);CHKERRQ(ierr);
1385         }
1386         total_counts++;
1387       }
1388     }
1389     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1390     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user */
1391     if (!pcbddc->use_nnsp_true) {
1392       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1393 
1394 #if defined(PETSC_MISSING_LAPACK_GESVD)
1395       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1396          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1397          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1398             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1399             from that computed using LAPACKgesvd
1400          -> This is due to a different computation of eigenvectors in LAPACKheev
1401          -> The quality of the POD-computed basis will be the same */
1402       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1403       /* Store upper triangular part of correlation matrix */
1404       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1405       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1406       for (j=0;j<temp_constraints;j++) {
1407         for (k=0;k<j+1;k++) {
1408           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1409         }
1410       }
1411       /* compute eigenvalues and eigenvectors of correlation matrix */
1412       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1413       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1414 #if !defined(PETSC_USE_COMPLEX)
1415       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1416 #else
1417       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1418 #endif
1419       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1420       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1421       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1422       j=0;
1423       while (j < temp_constraints && singular_vals[j] < tol) j++;
1424       total_counts=total_counts-j;
1425       /* scale and copy POD basis into used quadrature memory */
1426       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1427       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1428       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1429       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1430       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1431       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1432       if (j<temp_constraints) {
1433         PetscInt ii;
1434         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1435         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1436         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1437         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1438         for (k=0;k<temp_constraints-j;k++) {
1439           for (ii=0;ii<size_of_constraint;ii++) {
1440             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1441           }
1442         }
1443       }
1444 #else  /* on missing GESVD */
1445       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1446       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1447       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1448       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1449 #if !defined(PETSC_USE_COMPLEX)
1450       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1451 #else
1452       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1453 #endif
1454       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1455       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1456       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1457       k = temp_constraints;
1458       if (k > size_of_constraint) k = size_of_constraint;
1459       j = 0;
1460       while (j < k && singular_vals[k-j-1] < tol) j++;
1461       total_counts = total_counts-temp_constraints+k-j;
1462 #endif /* on missing GESVD */
1463     }
1464   }
1465   /* free index sets of faces, edges and vertices */
1466   for (i=0;i<n_ISForFaces;i++) {
1467     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1468   }
1469   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1470   for (i=0;i<n_ISForEdges;i++) {
1471     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1472   }
1473   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1474   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1475 
1476   /* free workspace */
1477   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1478     ierr = PetscFree(work);CHKERRQ(ierr);
1479 #if defined(PETSC_USE_COMPLEX)
1480     ierr = PetscFree(rwork);CHKERRQ(ierr);
1481 #endif
1482     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1483 #if defined(PETSC_MISSING_LAPACK_GESVD)
1484     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1485     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1486 #endif
1487   }
1488   for (k=0;k<nnsp_size;k++) {
1489     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1490   }
1491   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1492 
1493   /* set quantities in pcbddc data structure */
1494   /* n_vertices defines the number of subdomain corners in the primal space */
1495   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1496   pcbddc->local_primal_size = total_counts;
1497   pcbddc->n_vertices = n_vertices;
1498   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1499 
1500   /* Create constraint matrix */
1501   /* The constraint matrix is used to compute the l2g map of primal dofs */
1502   /* so we need to set it up properly either with or without change of basis */
1503   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1504   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1505   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1506   /* array to compute a local numbering of constraints : vertices first then constraints */
1507   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1508   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1509   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1510   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1511   /* auxiliary stuff for basis change */
1512   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1513   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1514 
1515   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1516   total_primal_vertices=0;
1517   for (i=0;i<pcbddc->local_primal_size;i++) {
1518     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1519     if (size_of_constraint == 1) {
1520       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1521       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1522       aux_primal_minloc[total_primal_vertices]=0;
1523       total_primal_vertices++;
1524     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1525       PetscInt min_loc,min_index;
1526       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1527       /* find first untouched local node */
1528       k = 0;
1529       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1530       min_index = global_indices[k];
1531       min_loc = k;
1532       /* search the minimum among global nodes already untouched on the cc */
1533       for (k=1;k<size_of_constraint;k++) {
1534         /* there can be more than one constraint on a single connected component */
1535         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1536           min_index = global_indices[k];
1537           min_loc = k;
1538         }
1539       }
1540       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1541       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1542       aux_primal_minloc[total_primal_vertices]=min_loc;
1543       total_primal_vertices++;
1544     }
1545   }
1546   /* free workspace */
1547   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1548   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1549   /* permute indices in order to have a sorted set of vertices */
1550   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);
1551 
1552   /* nonzero structure of constraint matrix */
1553   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1554   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1555   j=total_primal_vertices;
1556   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1557     if (!PetscBTLookup(change_basis,i)) {
1558       nnz[j]=temp_indices[i+1]-temp_indices[i];
1559       j++;
1560     }
1561   }
1562   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1563   ierr = PetscFree(nnz);CHKERRQ(ierr);
1564   /* set values in constraint matrix */
1565   for (i=0;i<total_primal_vertices;i++) {
1566     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1567   }
1568   total_counts = total_primal_vertices;
1569   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1570     if (!PetscBTLookup(change_basis,i)) {
1571       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1572       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1573       total_counts++;
1574     }
1575   }
1576   /* assembling */
1577   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1578   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1579   /*
1580   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1581   */
1582   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1583   if (pcbddc->use_change_of_basis) {
1584     PetscBool qr_needed = PETSC_FALSE;
1585     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1586     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1587     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1588     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1589     /* work arrays */
1590     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1591     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1592     /* nonzeros per row */
1593     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1594       if (PetscBTLookup(change_basis,i)) {
1595         qr_needed = PETSC_TRUE;
1596         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1597         for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1598       }
1599     }
1600     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1601     ierr = PetscFree(nnz);CHKERRQ(ierr);
1602     /* Set initial identity in the matrix */
1603     for (i=0;i<pcis->n_B;i++) {
1604       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1605     }
1606 
1607     /* Now we loop on the constraints which need a change of basis */
1608     /* Change of basis matrix is evaluated as the FIRST APPROACH in */
1609     /* Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) */
1610     /* Change of basis matrix T computed via QR decomposition of constraints */
1611     if (qr_needed) {
1612       /* dual and primal dofs on a single cc */
1613       PetscInt     dual_dofs,primal_dofs;
1614       /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1615       PetscInt     primal_counter;
1616       /* working stuff for GEQRF */
1617       PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1618       PetscBLASInt lqr_work;
1619       /* working stuff for UNGQR */
1620       PetscScalar  *gqr_work,lgqr_work_t;
1621       PetscBLASInt lgqr_work;
1622       /* working stuff for TRTRS */
1623       PetscScalar  *trs_rhs;
1624       PetscBLASInt Blas_NRHS;
1625       /* pointers for values insertion into change of basis matrix */
1626       PetscInt     *start_rows,*start_cols;
1627       PetscScalar  *start_vals;
1628       /* working stuff for values insertion */
1629       PetscBT      is_primal;
1630 
1631       /* space to store Q */
1632       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1633       /* first we issue queries for optimal work */
1634       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1635       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1636       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1637       lqr_work = -1;
1638       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1639       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1640       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1641       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1642       lgqr_work = -1;
1643       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1644       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1645       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1646       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1647       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1648       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1649       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1650       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1651       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1652       /* array to store scaling factors for reflectors */
1653       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1654       /* array to store rhs and solution of triangular solver */
1655       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1656       /* array to store whether a node is primal or not */
1657       ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1658       for (i=0;i<total_primal_vertices;i++) {
1659         ierr = PetscBTSet(is_primal,local_to_B[aux_primal_numbering[i]]);CHKERRQ(ierr);
1660       }
1661 
1662       /* allocating workspace for check */
1663       if (pcbddc->dbg_flag) {
1664         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1665         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1666         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1667       }
1668 
1669       /* loop on constraints and see whether or not they need a change of basis */
1670       /* -> using implicit ordering contained in temp_indices data */
1671       total_counts = pcbddc->n_vertices;
1672       primal_counter = total_counts;
1673       while (total_counts<pcbddc->local_primal_size) {
1674         primal_dofs = 1;
1675         if (PetscBTLookup(change_basis,total_counts)) {
1676           /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1677           while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1678             primal_dofs++;
1679           }
1680           /* get constraint info */
1681           size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1682           dual_dofs = size_of_constraint-primal_dofs;
1683 
1684           /* copy quadrature constraints for change of basis check */
1685           if (pcbddc->dbg_flag) {
1686             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d to %d need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs,size_of_constraint);CHKERRQ(ierr);
1687             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1688           }
1689 
1690           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1691           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1692 
1693           /* compute QR decomposition of constraints */
1694           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1695           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1696           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1697           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1698           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1699           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1700           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1701 
1702           /* explictly compute R^-T */
1703           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1704           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1705           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1706           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1707           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1708           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1709           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1710           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1711           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1712           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1713 
1714           /* explcitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1715           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1716           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1717           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1718           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1719           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1720           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1721           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1722           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1723 
1724           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1725              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1726              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1727           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1728           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1729           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1730           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1731           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1732           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1733           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1734           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1735           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1736           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1737 
1738           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1739           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1740           /* insert cols for primal dofs */
1741           for (j=0;j<primal_dofs;j++) {
1742             start_vals = &qr_basis[j*size_of_constraint];
1743             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1744             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1745           }
1746           /* insert cols for dual dofs */
1747           for (j=0,k=0;j<dual_dofs;k++) {
1748             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1749               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1750               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1751               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1752               j++;
1753             }
1754           }
1755 
1756           /* check change of basis */
1757           if (pcbddc->dbg_flag) {
1758             PetscInt   ii,jj;
1759             PetscBool valid_qr=PETSC_TRUE;
1760             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
1761             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1762             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
1763             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1764             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
1765             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
1766             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1767             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
1768             ierr = PetscFPTrapPop();CHKERRQ(ierr);
1769             for (jj=0;jj<size_of_constraint;jj++) {
1770               for (ii=0;ii<primal_dofs;ii++) {
1771                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
1772                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
1773               }
1774             }
1775             if (!valid_qr) {
1776               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1777               for (jj=0;jj<size_of_constraint;jj++) {
1778                 for (ii=0;ii<primal_dofs;ii++) {
1779                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
1780                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1781                   }
1782                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
1783                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
1784                   }
1785                 }
1786               }
1787             } else {
1788               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
1789             }
1790           }
1791           /* increment primal counter */
1792           primal_counter += primal_dofs;
1793         } else {
1794           if (pcbddc->dbg_flag) {
1795             ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
1796           }
1797         }
1798         /* increment constraint counter total_counts */
1799         total_counts += primal_dofs;
1800       }
1801       if (pcbddc->dbg_flag) {
1802         ierr = PetscFree(work);CHKERRQ(ierr);
1803       }
1804       /* free workspace */
1805       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
1806       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
1807       ierr = PetscFree(qr_work);CHKERRQ(ierr);
1808       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
1809       ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
1810       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
1811     }
1812     /* assembling */
1813     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1814     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1815     /*
1816     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
1817     */
1818   }
1819   if (pcbddc->dbg_flag) {
1820     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1821   }
1822   /* free workspace */
1823   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
1824   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
1825   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
1826   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
1827   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
1828   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
1829   ierr = PetscFree(local_to_B);CHKERRQ(ierr);
1830   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "PCBDDCAnalyzeInterface"
1836 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
1837 {
1838   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
1839   PC_IS       *pcis = (PC_IS*)pc->data;
1840   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
1841   PetscInt    bs,ierr,i,vertex_size;
1842   PetscViewer viewer=pcbddc->dbg_viewer;
1843 
1844   PetscFunctionBegin;
1845   /* Init local Graph struct */
1846   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
1847 
1848   /* Check validity of the csr graph passed in by the user */
1849   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
1850     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
1851   }
1852   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
1853   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
1854     Mat mat_adj;
1855     const PetscInt *xadj,*adjncy;
1856     PetscBool flg_row=PETSC_TRUE;
1857 
1858     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
1859     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1860     if (!flg_row) {
1861       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
1862     }
1863     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
1864     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
1865     if (!flg_row) {
1866       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
1867     }
1868     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
1869   }
1870 
1871   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting */
1872   vertex_size = 1;
1873   if (!pcbddc->n_ISForDofs) {
1874     IS *custom_ISForDofs;
1875 
1876     ierr = MatGetBlockSize(matis->A,&bs);CHKERRQ(ierr);
1877     ierr = PetscMalloc(bs*sizeof(IS),&custom_ISForDofs);CHKERRQ(ierr);
1878     for (i=0;i<bs;i++) {
1879       ierr = ISCreateStride(PETSC_COMM_SELF,pcis->n/bs,i,bs,&custom_ISForDofs[i]);CHKERRQ(ierr);
1880     }
1881     ierr = PCBDDCSetDofsSplitting(pc,bs,custom_ISForDofs);CHKERRQ(ierr);
1882     /* remove my references to IS objects */
1883     for (i=0;i<bs;i++) {
1884       ierr = ISDestroy(&custom_ISForDofs[i]);CHKERRQ(ierr);
1885     }
1886     ierr = PetscFree(custom_ISForDofs);CHKERRQ(ierr);
1887   } else { /* mat block size as vertex size (used for elasticity) */
1888     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
1889   }
1890 
1891   /* Setup of Graph */
1892   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundaries,pcbddc->DirichletBoundaries,pcbddc->n_ISForDofs,pcbddc->ISForDofs,pcbddc->user_primal_vertices);
1893 
1894   /* Graph's connected components analysis */
1895   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
1896 
1897   /* print some info to stdout */
1898   if (pcbddc->dbg_flag) {
1899     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
1906 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
1907 {
1908   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1909   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
1910   PetscErrorCode ierr;
1911 
1912   PetscFunctionBegin;
1913   n = 0;
1914   vertices = 0;
1915   if (pcbddc->ConstraintMatrix) {
1916     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
1917     for (i=0;i<local_primal_size;i++) {
1918       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1919       if (size_of_constraint == 1) n++;
1920       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1921     }
1922     if (vertices_idx) {
1923       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
1924       n = 0;
1925       for (i=0;i<local_primal_size;i++) {
1926         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1927         if (size_of_constraint == 1) {
1928           vertices[n++]=row_cmat_indices[0];
1929         }
1930         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1931       }
1932     }
1933   }
1934   *n_vertices = n;
1935   if (vertices_idx) *vertices_idx = vertices;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
1941 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
1942 {
1943   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
1944   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
1945   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
1946   PetscBT        touched;
1947   PetscErrorCode ierr;
1948 
1949     /* This function assumes that the number of local constraints per connected component
1950        is not greater than the number of nodes defined for the connected component
1951        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
1952   PetscFunctionBegin;
1953   n = 0;
1954   constraints_index = 0;
1955   if (pcbddc->ConstraintMatrix) {
1956     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
1957     max_size_of_constraint = 0;
1958     for (i=0;i<local_primal_size;i++) {
1959       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1960       if (size_of_constraint > 1) {
1961         n++;
1962       }
1963       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
1964       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
1965     }
1966     if (constraints_idx) {
1967       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
1968       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
1969       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
1970       n = 0;
1971       for (i=0;i<local_primal_size;i++) {
1972         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1973         if (size_of_constraint > 1) {
1974           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
1975           /* find first untouched local node */
1976           j = 0;
1977           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
1978           min_index = row_cmat_global_indices[j];
1979           min_loc = j;
1980           /* search the minimum among nodes not yet touched on the connected component
1981              since there can be more than one constraint on a single cc */
1982           for (j=1;j<size_of_constraint;j++) {
1983             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
1984               min_index = row_cmat_global_indices[j];
1985               min_loc = j;
1986             }
1987           }
1988           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
1989           constraints_index[n++] = row_cmat_indices[min_loc];
1990         }
1991         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
1992       }
1993       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
1994       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
1995     }
1996   }
1997   *n_constraints = n;
1998   if (constraints_idx) *constraints_idx = constraints_index;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 /* the next two functions has been adapted from pcis.c */
2003 #undef __FUNCT__
2004 #define __FUNCT__ "PCBDDCApplySchur"
2005 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2006 {
2007   PetscErrorCode ierr;
2008   PC_IS          *pcis = (PC_IS*)(pc->data);
2009 
2010   PetscFunctionBegin;
2011   if (!vec2_B) { vec2_B = v; }
2012   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2013   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2014   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2015   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2016   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2022 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2023 {
2024   PetscErrorCode ierr;
2025   PC_IS          *pcis = (PC_IS*)(pc->data);
2026 
2027   PetscFunctionBegin;
2028   if (!vec2_B) { vec2_B = v; }
2029   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2030   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2031   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2032   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2033   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "PCBDDCSubsetNumbering"
2039 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2040 {
2041   Vec            local_vec,global_vec;
2042   IS             seqis,paris;
2043   VecScatter     scatter_ctx;
2044   PetscScalar    *array;
2045   PetscInt       *temp_global_dofs;
2046   PetscScalar    globalsum;
2047   PetscInt       i,j,s;
2048   PetscInt       nlocals,first_index,old_index,max_local;
2049   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2050   PetscMPIInt    *dof_sizes,*dof_displs;
2051   PetscBool      first_found;
2052   PetscErrorCode ierr;
2053 
2054   PetscFunctionBegin;
2055   /* mpi buffers */
2056   MPI_Comm_size(comm,&size_prec_comm);
2057   MPI_Comm_rank(comm,&rank_prec_comm);
2058   j = ( !rank_prec_comm ? size_prec_comm : 0);
2059   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2060   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2061   /* get maximum size of subset */
2062   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2063   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2064   max_local = 0;
2065   if (n_local_dofs) {
2066     max_local = temp_global_dofs[0];
2067     for (i=1;i<n_local_dofs;i++) {
2068       if (max_local < temp_global_dofs[i] ) {
2069         max_local = temp_global_dofs[i];
2070       }
2071     }
2072   }
2073   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2074   max_global++;
2075   max_local = 0;
2076   if (n_local_dofs) {
2077     max_local = local_dofs[0];
2078     for (i=1;i<n_local_dofs;i++) {
2079       if (max_local < local_dofs[i] ) {
2080         max_local = local_dofs[i];
2081       }
2082     }
2083   }
2084   max_local++;
2085   /* allocate workspace */
2086   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2087   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2088   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2089   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2090   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2091   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2092   /* create scatter */
2093   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2094   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2095   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2096   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2097   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2098   /* init array */
2099   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2100   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2101   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2102   if (local_dofs_mult) {
2103     for (i=0;i<n_local_dofs;i++) {
2104       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2105     }
2106   } else {
2107     for (i=0;i<n_local_dofs;i++) {
2108       array[local_dofs[i]]=1.0;
2109     }
2110   }
2111   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2112   /* scatter into global vec and get total number of global dofs */
2113   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2114   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2116   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2117   /* Fill global_vec with cumulative function for global numbering */
2118   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2119   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2120   nlocals = 0;
2121   first_index = -1;
2122   first_found = PETSC_FALSE;
2123   for (i=0;i<s;i++) {
2124     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2125       first_found = PETSC_TRUE;
2126       first_index = i;
2127     }
2128     nlocals += (PetscInt)PetscRealPart(array[i]);
2129   }
2130   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2131   if (!rank_prec_comm) {
2132     dof_displs[0]=0;
2133     for (i=1;i<size_prec_comm;i++) {
2134       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2135     }
2136   }
2137   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2138   if (first_found) {
2139     array[first_index] += (PetscScalar)nlocals;
2140     old_index = first_index;
2141     for (i=first_index+1;i<s;i++) {
2142       if (PetscRealPart(array[i]) > 0.0) {
2143         array[i] += array[old_index];
2144         old_index = i;
2145       }
2146     }
2147   }
2148   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2149   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2150   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2151   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2152   /* get global ordering of local dofs */
2153   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2154   if (local_dofs_mult) {
2155     for (i=0;i<n_local_dofs;i++) {
2156       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2157     }
2158   } else {
2159     for (i=0;i<n_local_dofs;i++) {
2160       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2161     }
2162   }
2163   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2164   /* free workspace */
2165   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2166   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2167   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2168   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2169   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2170   /* return pointer to global ordering of local dofs */
2171   *global_numbering_subset = temp_global_dofs;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2177 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2178 {
2179   PetscInt       i,j;
2180   PetscScalar    *alphas;
2181   PetscErrorCode ierr;
2182 
2183   PetscFunctionBegin;
2184   /* this implements stabilized Gram-Schmidt */
2185   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2186   for (i=0;i<n;i++) {
2187     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2188     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2189     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2190   }
2191   ierr = PetscFree(alphas);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 /* Currently support MAT_INITIAL_MATRIX only, with partial support to block matrices */
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatConvert_IS_AIJ"
2198 static PetscErrorCode MatConvert_IS_AIJ(Mat mat, MatType newtype,MatReuse reuse,Mat *M)
2199 {
2200   Mat new_mat;
2201   Mat_IS *matis = (Mat_IS*)(mat->data);
2202   /* info on mat */
2203   PetscInt bs,rows,cols;
2204   PetscInt lrows,lcols;
2205   PetscInt local_rows,local_cols;
2206   PetscMPIInt nsubdomains;
2207   /* preallocation */
2208   Vec vec_dnz,vec_onz;
2209   PetscScalar *my_dnz,*my_onz,*array;
2210   PetscInt *dnz,*onz,*mat_ranges,*row_ownership;
2211   PetscInt  index_row,index_col,owner;
2212   PetscInt  *local_indices,*global_indices;
2213   /* work */
2214   PetscInt i,j;
2215   PetscErrorCode ierr;
2216 
2217   PetscFunctionBegin;
2218   /* CHECKS */
2219   /* get info from mat */
2220   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2221   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2222   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2223   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2224 
2225   /* MAT_INITIAL_MATRIX */
2226   ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2227   ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2228   ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2229   ierr = MatSetType(new_mat,newtype);CHKERRQ(ierr);
2230   ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2231   ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2232 
2233   /*
2234     preallocation
2235   */
2236 
2237   ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2238   /*
2239      Some vectors are needed to sum up properly on shared interface dofs.
2240      Note that preallocation is not exact, since it overestimates nonzeros
2241   */
2242 /*
2243   ierr = VecCreate(PetscObjectComm((PetscObject)mat),&vec_dnz);CHKERRQ(ierr);
2244   ierr = VecSetBlockSize(vec_dnz,bs);CHKERRQ(ierr);
2245   ierr = VecSetSizes(vec_dnz,PETSC_DECIDE,rows);CHKERRQ(ierr);
2246   ierr = VecSetType(vec_dnz,VECSTANDARD);CHKERRQ(ierr);
2247 */
2248   ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2249   ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2250   /* All processes need to compute entire row ownership */
2251   ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2252   ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2253   for (i=0;i<nsubdomains;i++) {
2254     for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2255       row_ownership[j]=i;
2256     }
2257   }
2258   /* map indices of local mat to global values */
2259   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2260   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2261   for (i=0;i<local_rows;i++) local_indices[i]=i;
2262   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2263   ierr = PetscFree(local_indices);CHKERRQ(ierr);
2264 
2265   /*
2266      my_dnz and my_onz contains exact contribution to preallocation from each local mat
2267      then, they will be summed up properly. This way, preallocation is always sufficient
2268   */
2269   ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2270   ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2271   ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2272   ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2273   for (i=0;i<local_rows;i++) {
2274     index_row = global_indices[i];
2275     for (j=i;j<local_rows;j++) {
2276       owner = row_ownership[index_row];
2277       index_col = global_indices[j];
2278       if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2279         my_dnz[i] += 1.0;
2280       } else { /* offdiag block */
2281         my_onz[i] += 1.0;
2282       }
2283       /* same as before, interchanging rows and cols */
2284       if (i != j) {
2285         owner = row_ownership[index_col];
2286         if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2287           my_dnz[j] += 1.0;
2288         } else {
2289           my_onz[j] += 1.0;
2290         }
2291       }
2292     }
2293   }
2294   ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2295   ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2296   if (local_rows) { /* multilevel guard */
2297     ierr = VecSetValues(vec_dnz,local_rows,global_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2298     ierr = VecSetValues(vec_onz,local_rows,global_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2299   }
2300   ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2301   ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2302   ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2303   ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2304   ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2305   ierr = PetscFree(my_onz);CHKERRQ(ierr);
2306   ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2307 
2308   /* set computed preallocation in dnz and onz */
2309   ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2310   for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2311   ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2312   ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2313   for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2314   ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2315   ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2316   ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2317 
2318   /* Resize preallocation if overestimated */
2319   for (i=0;i<lrows;i++) {
2320     dnz[i] = PetscMin(dnz[i],lcols);
2321     onz[i] = PetscMin(onz[i],cols-lcols);
2322   }
2323   ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2324   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2325 
2326   /*
2327     Set values. Very Basic.
2328   */
2329   for (i=0;i<local_rows;i++) {
2330     ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2331     ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2332     ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2333     ierr = MatSetValues(new_mat,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2334     ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2335   }
2336   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2337   ierr = PetscFree(global_indices);CHKERRQ(ierr);
2338   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339 
2340   /* get back mat */
2341   *M = new_mat;
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatISSubassemble_Private"
2347 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2348 {
2349   Mat             subdomain_adj;
2350   IS              new_ranks,ranks_send_to;
2351   MatPartitioning partitioner;
2352   Mat_IS          *matis;
2353   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2354   PetscInt        prank;
2355   PetscMPIInt     size,rank,color;
2356   PetscInt        *xadj,*adjncy,*oldranks;
2357   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2358   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2359   PetscErrorCode  ierr;
2360   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2361   PetscSubcomm    subcomm;
2362 
2363   PetscFunctionBegin;
2364   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2365   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2366   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2367 
2368   /* Get info on mapping */
2369   matis = (Mat_IS*)(mat->data);
2370   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2371   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2372 
2373   /* build local CSR graph of subdomains' connectivity */
2374   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2375   xadj[0] = 0;
2376   xadj[1] = PetscMax(n_neighs-1,0);
2377   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2378   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2379 
2380   if (threshold) {
2381     PetscInt* count,min_threshold;
2382     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2383     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2384     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2385       for (j=0;j<n_shared[i];j++) {
2386         count[shared[i][j]] += 1;
2387       }
2388     }
2389     /* adapt threshold since we dont want to lose significant connections */
2390     min_threshold = n_neighs;
2391     for (i=1;i<n_neighs;i++) {
2392       for (j=0;j<n_shared[i];j++) {
2393         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2394       }
2395     }
2396     PetscPrintf(PETSC_COMM_SELF,"PASSED THRESHOLD %d\n",threshold);
2397     threshold = PetscMax(min_threshold+1,threshold);
2398     PetscPrintf(PETSC_COMM_SELF,"USING THRESHOLD %d (min %d)\n",threshold,min_threshold);
2399     xadj[1] = 0;
2400     for (i=1;i<n_neighs;i++) {
2401       for (j=0;j<n_shared[i];j++) {
2402         if (count[shared[i][j]] < threshold) {
2403           adjncy[xadj[1]] = neighs[i];
2404           adjncy_wgt[xadj[1]] = n_shared[i];
2405           xadj[1]++;
2406           break;
2407         }
2408       }
2409     }
2410     ierr = PetscFree(count);CHKERRQ(ierr);
2411   } else {
2412     if (xadj[1]) {
2413       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2414       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2415     }
2416   }
2417   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2418   if (use_square) {
2419     for (i=0;i<xadj[1];i++) {
2420       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2421     }
2422   }
2423   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2424 
2425   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2426 
2427   /*
2428     Restrict work on active processes only.
2429   */
2430   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2431   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2432   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2433   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2434   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank,rank);CHKERRQ(ierr);
2435   if (color) {
2436     ierr = PetscFree(xadj);CHKERRQ(ierr);
2437     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2438     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2439   } else {
2440     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2441     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2442     prank = rank;
2443     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2444     for (i=0;i<size;i++) {
2445       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2446     }
2447     for (i=0;i<xadj[1];i++) {
2448       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2449     }
2450     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2451     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2452     n_subdomains = (PetscInt)size;
2453     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2454 
2455     /* Partition */
2456     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2457     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2458     if (use_vwgt) {
2459       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2460       v_wgt[0] = local_size;
2461       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2462     }
2463     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2464     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2465     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2466     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2467     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2468 
2469     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2470     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2471     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2472     /* clean up */
2473     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2474     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2475     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2476     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2477   }
2478   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2479 
2480   /* assemble parallel IS for sends */
2481   i = 1;
2482   if (color) i=0;
2483   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2484   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2485 
2486   /* get back IS */
2487   *is_sends = ranks_send_to;
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2492 
2493 #undef __FUNCT__
2494 #define __FUNCT__ "MatISSubassemble"
2495 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2496 {
2497   Mat                    local_mat,new_mat;
2498   Mat_IS                 *matis;
2499   IS                     is_sends_internal;
2500   PetscInt               rows,cols;
2501   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2502   PetscBool              ismatis,isdense;
2503   ISLocalToGlobalMapping l2gmap;
2504   PetscInt*              l2gmap_indices;
2505   const PetscInt*        is_indices;
2506   MatType                new_local_type;
2507   MatTypePrivate         new_local_type_private;
2508   /* buffers */
2509   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2510   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2511   /* MPI */
2512   MPI_Comm               comm;
2513   PetscMPIInt            n_sends,n_recvs,commsize;
2514   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2515   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2516   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2517   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2518   PetscErrorCode         ierr;
2519 
2520   PetscFunctionBegin;
2521   /* checks */
2522   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2523   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2524   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2525   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2526   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2527   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2528   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2529   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2530   PetscValidLogicalCollectiveInt(mat,bs,0);
2531   /* prepare IS for sending if not provided */
2532   if (!is_sends) {
2533     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2534     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2535   } else {
2536     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2537     is_sends_internal = is_sends;
2538   }
2539 
2540   /* get pointer of MATIS data */
2541   matis = (Mat_IS*)mat->data;
2542 
2543   /* get comm */
2544   comm = PetscObjectComm((PetscObject)mat);
2545 
2546   /* compute number of sends */
2547   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2548   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2549 
2550   /* compute number of receives */
2551   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2552   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2553   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2554   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2555   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2556   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2557   ierr = PetscFree(iflags);CHKERRQ(ierr);
2558 
2559   /* prepare send/receive buffers */
2560   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2561   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2562   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2563   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2564 
2565   /* Get data from local mat */
2566   if (!isdense) {
2567     /* TODO: See below some guidelines on how to prepare the local buffers */
2568     /*
2569        send_buffer_vals should contain the raw values of the local matrix
2570        send_buffer_idxs should contain:
2571        - MatType_PRIVATE type
2572        - PetscInt        size_of_l2gmap
2573        - PetscInt        global_row_indices[size_of_l2gmap]
2574        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2575     */
2576   } else {
2577     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2578     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2579     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2580     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2581     send_buffer_idxs[1] = i;
2582     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2583     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2584     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2585     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2586     for (i=0;i<n_sends;i++) {
2587       ilengths_vals[is_indices[i]] = len*len;
2588       ilengths_idxs[is_indices[i]] = len+2;
2589     }
2590   }
2591   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2592   buf_size_idxs = 0;
2593   buf_size_vals = 0;
2594   for (i=0;i<n_recvs;i++) {
2595     buf_size_idxs += (PetscInt)olengths_idxs[i];
2596     buf_size_vals += (PetscInt)olengths_vals[i];
2597   }
2598   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2599   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2600 
2601   /* get new tags for clean communications */
2602   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2603   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2604 
2605   /* allocate for requests */
2606   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2607   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2608   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2609   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2610 
2611   /* communications */
2612   ptr_idxs = recv_buffer_idxs;
2613   ptr_vals = recv_buffer_vals;
2614   for (i=0;i<n_recvs;i++) {
2615     source_dest = onodes[i];
2616     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2617     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2618     ptr_idxs += olengths_idxs[i];
2619     ptr_vals += olengths_vals[i];
2620   }
2621   for (i=0;i<n_sends;i++) {
2622     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2623     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2624     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2625   }
2626   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2627   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2628 
2629   /* assemble new l2g map */
2630   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2631   ptr_idxs = recv_buffer_idxs;
2632   buf_size_idxs = 0;
2633   for (i=0;i<n_recvs;i++) {
2634     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2635     ptr_idxs += olengths_idxs[i];
2636   }
2637   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2638   ptr_idxs = recv_buffer_idxs;
2639   buf_size_idxs = 0;
2640   for (i=0;i<n_recvs;i++) {
2641     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
2642     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2643     ptr_idxs += olengths_idxs[i];
2644   }
2645   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
2646   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
2647   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
2648 
2649   /* infer new local matrix type from received local matrices type */
2650   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
2651   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
2652   new_local_type_private = MATAIJ_PRIVATE;
2653   new_local_type = MATSEQAIJ;
2654   if (n_recvs) {
2655     new_local_type_private = send_buffer_idxs[0];
2656     ptr_idxs = recv_buffer_idxs;
2657     for (i=0;i<n_recvs;i++) {
2658       if ((PetscInt)new_local_type_private != *ptr_idxs) {
2659         new_local_type_private = MATAIJ_PRIVATE;
2660         break;
2661       }
2662       ptr_idxs += olengths_idxs[i];
2663     }
2664     switch (new_local_type_private) {
2665       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
2666         new_local_type = MATSEQAIJ;
2667         bs = 1;
2668         break;
2669       case MATAIJ_PRIVATE:
2670         new_local_type = MATSEQAIJ;
2671         bs = 1;
2672         break;
2673       case MATBAIJ_PRIVATE:
2674         new_local_type = MATSEQBAIJ;
2675         break;
2676       case MATSBAIJ_PRIVATE:
2677         new_local_type = MATSEQSBAIJ;
2678         break;
2679       default:
2680         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
2681         break;
2682     }
2683   }
2684 
2685   /* create MATIS object */
2686   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2687   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
2688   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
2689   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
2690   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
2691   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
2692 
2693   /* set values */
2694   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2695   ptr_vals = recv_buffer_vals;
2696   ptr_idxs = recv_buffer_idxs;
2697   for (i=0;i<n_recvs;i++) {
2698     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
2699       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2700       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
2701       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2702       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
2703       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2704     }
2705     ptr_idxs += olengths_idxs[i];
2706     ptr_vals += olengths_vals[i];
2707   }
2708   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2709   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2710   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2711   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2712 
2713   { /* check */
2714     Vec       lvec,rvec;
2715     PetscReal infty_error;
2716 
2717     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
2718     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
2719     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
2720     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
2721     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
2722     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2723     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
2724     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
2725     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
2726   }
2727 
2728   /* free workspace */
2729   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
2730   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
2731   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2732   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
2733   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2734   if (isdense) {
2735     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2736     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2737   } else {
2738     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
2739   }
2740   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
2741   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
2742   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
2743   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
2744   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
2745   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
2746   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
2747   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
2748   ierr = PetscFree(onodes);CHKERRQ(ierr);
2749   /* get back new mat */
2750   *mat_n = new_mat;
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 #undef __FUNCT__
2755 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
2756 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
2757 {
2758   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
2759   PC_IS                  *pcis = (PC_IS*)pc->data;
2760   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
2761   MatNullSpace           CoarseNullSpace=NULL;
2762   ISLocalToGlobalMapping coarse_islg;
2763   IS                     coarse_is;
2764   PetscInt               max_it,coarse_size,*local_primal_indices=NULL;
2765   PetscInt               im_active=-1,active_procs=-1;
2766   PC                     pc_temp;
2767   PCType                 coarse_pc_type;
2768   KSPType                coarse_ksp_type;
2769   PetscBool              multilevel_requested,multilevel_allowed;
2770   PetscBool              setsym,issym,isbddc,isnn;
2771   MatStructure           matstruct;
2772   PetscErrorCode         ierr;
2773 
2774   PetscFunctionBegin;
2775   /* Assign global numbering to coarse dofs */
2776   ierr = PCBDDCComputePrimalNumbering(pc,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
2777 
2778   /* infer some info from user */
2779   issym = PETSC_FALSE;
2780   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
2781   multilevel_allowed = PETSC_FALSE;
2782   multilevel_requested = PETSC_FALSE;
2783   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
2784   if (multilevel_requested) {
2785     /* count "active processes" */
2786     im_active = !!(pcis->n);
2787     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2788     if (active_procs/pcbddc->coarsening_ratio < 2) {
2789       multilevel_allowed = PETSC_FALSE;
2790     } else {
2791       multilevel_allowed = PETSC_TRUE;
2792     }
2793   }
2794 
2795   /* set defaults for coarse KSP and PC */
2796   if (multilevel_allowed) {
2797     if (issym) {
2798       coarse_ksp_type = KSPRICHARDSON;
2799     } else {
2800       coarse_ksp_type = KSPCHEBYSHEV;
2801     }
2802     coarse_pc_type = PCBDDC;
2803   } else {
2804     coarse_ksp_type = KSPPREONLY;
2805     coarse_pc_type = PCREDUNDANT;
2806   }
2807 
2808   /* create the coarse KSP object only once with defaults */
2809   if (!pcbddc->coarse_ksp) {
2810     char prefix[256],str_level[3];
2811     size_t len;
2812     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
2813     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
2814     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
2815     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2816     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2817     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2818     /* prefix */
2819     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
2820     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
2821     if (!pcbddc->current_level) {
2822       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
2823       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
2824     } else {
2825       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
2826       if (pcbddc->current_level>1) len -= 2;
2827       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
2828       *(prefix+len)='\0';
2829       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
2830       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
2831     }
2832     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
2833   }
2834   /* allow user customization */
2835   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2836   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
2837   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
2838 
2839   /* get some info after set from options */
2840   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
2841   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
2842   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
2843   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
2844     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
2845     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
2846     isbddc = PETSC_FALSE;
2847   }
2848 
2849   /* propagate BDDC info to the next level */
2850   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
2851   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2852   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
2853 
2854   /* creates temporary MATIS object for coarse matrix */
2855   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,local_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
2856   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
2857   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
2858   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,coarse_size,coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
2859   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
2860   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2861   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
2863   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
2864   ierr = PetscFree(local_primal_indices);CHKERRQ(ierr);
2865 
2866   /* assemble coarse matrix */
2867   if (isbddc || isnn) {
2868     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
2869   } else {
2870     ierr = MatConvert_IS_AIJ(coarse_mat_is,MATAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
2871   }
2872   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
2873 
2874   /* create local to global scatters for coarse problem */
2875   ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
2876   ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
2877   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
2878 
2879   /* propagate symmetry info to coarse matrix */
2880   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
2881 
2882   /* Compute coarse null space (special handling by BDDC only) */
2883   if (pcbddc->NullSpace) {
2884     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
2885     if (isbddc) {
2886       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
2887     } else {
2888       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
2889     }
2890   }
2891 
2892   /* set operators */
2893   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
2894   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
2895 
2896   /* additional KSP customization */
2897   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
2898   if (max_it < 5) {
2899     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
2900   }
2901   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
2902 
2903 
2904   /* print some info if requested */
2905   if (pcbddc->dbg_flag) {
2906     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
2907     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
2908     if (!multilevel_allowed) {
2909       if (multilevel_requested) {
2910         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
2911       } else if (pcbddc->max_levels) {
2912         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
2913       }
2914     }
2915     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
2916     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2917   }
2918 
2919   /* setup coarse ksp */
2920   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
2921   if (pcbddc->dbg_flag) {
2922     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
2923     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2924     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
2925     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2926   }
2927 
2928   /* Check coarse problem if requested */
2929   if (pcbddc->dbg_flag) {
2930     KSP       check_ksp;
2931     KSPType   check_ksp_type;
2932     PC        check_pc;
2933     Vec       check_vec;
2934     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
2935     PetscInt  its;
2936     PetscBool ispreonly,compute;
2937 
2938     /* Create ksp object suitable for estimation of extreme eigenvalues */
2939     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
2940     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
2941     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,coarse_size);CHKERRQ(ierr);
2942     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
2943     if (ispreonly) {
2944       check_ksp_type = KSPPREONLY;
2945       compute = PETSC_FALSE;
2946     } else {
2947       if (issym) check_ksp_type = KSPCG;
2948       else check_ksp_type = KSPGMRES;
2949       compute = PETSC_TRUE;
2950     }
2951     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
2952     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
2953     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
2954     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
2955     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
2956     /* create random vec */
2957     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
2958     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
2959     if (CoarseNullSpace) {
2960       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
2961     }
2962     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2963     /* solve coarse problem */
2964     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
2965     if (CoarseNullSpace) {
2966       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
2967     }
2968     /* check coarse problem residual error */
2969     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
2970     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
2971     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
2972     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
2973     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
2974     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
2975     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
2976     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
2977     /* get eigenvalue estimation if preonly has not been requested */
2978     if (!ispreonly) {
2979       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
2980       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
2981       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
2982     }
2983     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2984     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
2985   }
2986   /* free memory */
2987   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
2988   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 #undef __FUNCT__
2993 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
2994 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
2995 {
2996   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
2997   PC_IS*         pcis = (PC_IS*)pc->data;
2998   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
2999   PetscInt       i,j,coarse_size;
3000   PetscInt       *local_primal_indices,*auxlocal_primal,*aux_idx;
3001   PetscErrorCode ierr;
3002 
3003   PetscFunctionBegin;
3004   /* get indices in local ordering for vertices and constraints */
3005   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&auxlocal_primal);CHKERRQ(ierr);
3006   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_idx);CHKERRQ(ierr);
3007   ierr = PetscMemcpy(auxlocal_primal,aux_idx,i*sizeof(PetscInt));CHKERRQ(ierr);
3008   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3009   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_idx);CHKERRQ(ierr);
3010   ierr = PetscMemcpy(&auxlocal_primal[i],aux_idx,j*sizeof(PetscInt));CHKERRQ(ierr);
3011   ierr = PetscFree(aux_idx);CHKERRQ(ierr);
3012 
3013   /* Compute global number of coarse dofs */
3014   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,auxlocal_primal,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3015 
3016   /* check numbering */
3017   if (pcbddc->dbg_flag) {
3018     PetscScalar coarsesum,*array;
3019 
3020     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3021     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3022     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3023     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3024     for (i=0;i<pcbddc->local_primal_size;i++) {
3025       ierr = VecSetValue(pcis->vec1_N,auxlocal_primal[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3026     }
3027     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3028     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3029     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3030     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3031     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3032     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3033     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3034     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3035     for (i=0;i<pcis->n;i++) {
3036       if (array[i] == 1.0) {
3037         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3038       }
3039     }
3040     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3041     for (i=0;i<pcis->n;i++) {
3042       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3043     }
3044     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3045     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3046     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3047     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3048     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3049     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3050     if (pcbddc->dbg_flag > 1) {
3051       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3052       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3053       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3054       for (i=0;i<pcbddc->local_primal_size;i++) {
3055         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d \n",i,local_primal_indices[i]);
3056       }
3057       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3058     }
3059     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3060   }
3061   ierr = PetscFree(auxlocal_primal);CHKERRQ(ierr);
3062   /* get back data */
3063   *coarse_size_n = coarse_size;
3064   *local_primal_indices_n = local_primal_indices;
3065   PetscFunctionReturn(0);
3066 }
3067 
3068