xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 70cf5478945a9d09b9da66ef3d9730962f546e98)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
117 {
118   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
119   PC_IS          *pcis = (PC_IS*)pc->data;
120   VecType        impVecType;
121   PetscInt       n_constraints,n_R,old_size;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   if (!pcbddc->ConstraintMatrix) {
126     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
127   }
128   /* get sizes */
129   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
130   n_R = pcis->n-pcbddc->n_actual_vertices;
131   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
132   /* local work vectors (try to avoid unneeded work)*/
133   /* R nodes */
134   old_size = -1;
135   if (pcbddc->vec1_R) {
136     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
137   }
138   if (n_R != old_size) {
139     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
140     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
141     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
143     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
144     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
145   }
146   /* local primal dofs */
147   old_size = -1;
148   if (pcbddc->vec1_P) {
149     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
150   }
151   if (pcbddc->local_primal_size != old_size) {
152     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
153     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
155     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
156   }
157   /* local explicit constraints */
158   old_size = -1;
159   if (pcbddc->vec1_C) {
160     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
161   }
162   if (n_constraints && n_constraints != old_size) {
163     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
164     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
166     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "PCBDDCSetUpCorrection"
173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
174 {
175   PetscErrorCode         ierr;
176   /* pointers to pcis and pcbddc */
177   PC_IS*                 pcis = (PC_IS*)pc->data;
178   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
179   /* submatrices of local problem */
180   Mat                    A_RV,A_VR,A_VV;
181   /* working matrices */
182   Mat                    M1,M2,M3,C_CR;
183   /* working vectors */
184   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
185   /* additional working stuff */
186   IS                     is_aux;
187   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
188   const PetscScalar      *array,*row_cmat_values;
189   const PetscInt         *row_cmat_indices,*idx_R_local;
190   PetscInt               *idx_V_B,*auxindices;
191   PetscInt               n_vertices,n_constraints,size_of_constraint;
192   PetscInt               i,j,n_R,n_D,n_B;
193   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
194   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
195   MatType                impMatType;
196   /* some shortcuts to scalars */
197   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
198   /* for debugging purposes */
199   PetscReal              *coarsefunctions_errors,*constraints_errors;
200 
201   PetscFunctionBegin;
202   /* get number of vertices (corners plus constraints with change of basis)
203      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
204   n_vertices = pcbddc->n_actual_vertices;
205   n_constraints = pcbddc->local_primal_size-n_vertices;
206   /* Set Non-overlapping dimensions */
207   n_B = pcis->n_B; n_D = pcis->n - n_B;
208   n_R = pcis->n-n_vertices;
209 
210   /* Set types for local objects needed by BDDC precondtioner */
211   impMatType = MATSEQDENSE;
212 
213   /* Allocating some extra storage just to be safe */
214   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
215   for (i=0;i<pcis->n;i++) auxindices[i]=i;
216 
217   /* vertices in boundary numbering */
218   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
219   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
220   if (i != n_vertices) {
221     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
222   }
223 
224   /* Precompute stuffs needed for preprocessing and application of BDDC*/
225   if (n_constraints) {
226     /* see if we can save some allocations */
227     if (pcbddc->local_auxmat2) {
228       PetscInt on_R,on_constraints;
229       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
230       if (on_R != n_R || on_constraints != n_constraints) {
231         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
232         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
233       }
234     }
235     /* work vectors */
236     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
238     /* auxiliary matrices */
239     if (!pcbddc->local_auxmat2) {
240       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
241       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
242       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
243       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
244     }
245 
246     /* Extract constraints on R nodes: C_{CR}  */
247     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
248     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
249     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
250 
251     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
252     for (i=0;i<n_constraints;i++) {
253       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
254       /* Get row of constraint matrix in R numbering */
255       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
256       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
257       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
259       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
260       /* Solve for row of constraint matrix in R numbering */
261       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
262       /* Set values in local_auxmat2 */
263       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
264       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
265       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266     }
267     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
268     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
270 
271     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
272     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
273     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
274     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
275     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
276     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
277     ierr = MatSetUp(M1);CHKERRQ(ierr);
278     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
279     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
280     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
281     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
282     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
283     ierr = MatDestroy(&M2);CHKERRQ(ierr);
284     ierr = MatDestroy(&M3);CHKERRQ(ierr);
285     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
286     if (!pcbddc->local_auxmat1) {
287       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
288     } else {
289       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     }
291   }
292 
293   /* Get submatrices from subdomain matrix */
294   if (n_vertices) {
295     PetscInt ibs,mbs;
296     PetscBool issbaij;
297     Mat newmat;
298 
299     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
300     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
301     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
302     if (ibs != mbs) { /* need to convert to SEQAIJ */
303       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
304       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
307       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
308     } else {
309       /* this is safe */
310       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
311       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
312       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
313         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
314         /* which of the two approaches is faster? */
315         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
316         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
317         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
319         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
320       } else {
321         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
322         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323       }
324     }
325     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
326     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
327     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
328   }
329 
330   /* Matrix of coarse basis functions (local) */
331   if (pcbddc->coarse_phi_B) {
332     PetscInt on_B,on_primal;
333     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
334     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
335       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
336       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
337     }
338   }
339   if (pcbddc->coarse_phi_D) {
340     PetscInt on_D,on_primal;
341     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
342     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
343       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
344       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
345     }
346   }
347   if (!pcbddc->coarse_phi_B) {
348     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
349     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
350     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
351     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
352   }
353   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
354     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
355     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
356     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
357     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
358   }
359 
360   if (pcbddc->dbg_flag) {
361     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
362     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
363     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
364   }
365   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
366   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
367 
368   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
369 
370   /* vertices */
371   for (i=0;i<n_vertices;i++) {
372     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
373     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
374     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
375     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
376     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
377     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
378     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
379     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
382     if (n_constraints) {
383       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
384       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
385       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
386     }
387     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
388     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
389 
390     /* Set values in coarse basis function and subdomain part of coarse_mat */
391     /* coarse basis functions */
392     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
393     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
394     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
396     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
397     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
399     if (pcbddc->switch_static || pcbddc->dbg_flag) {
400       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
403       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405     }
406     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
407     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
408     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
409     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     if (n_constraints) {
411       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
412       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
413       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
414     }
415 
416     /* check */
417     if (pcbddc->dbg_flag) {
418       /* assemble subdomain vector on local nodes */
419       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
420       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
421       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
425       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
426       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
427       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
428       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
429       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
430       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       if (n_constraints) {
432         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
433         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
434         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
435       }
436       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
439       /* check saddle point solution */
440       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
443       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
444       /* shift by the identity matrix */
445       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
449     }
450   }
451 
452   /* constraints */
453   for (i=0;i<n_constraints;i++) {
454     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
455     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
456     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
457     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
458     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
459     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
460     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
461     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
462     if (n_vertices) {
463       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
464     }
465     /* Set values in coarse basis function and subdomain part of coarse_mat */
466     /* coarse basis functions */
467     j = i+n_vertices; /* don't touch this! */
468     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
469     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
472     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
473     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     if (pcbddc->switch_static || pcbddc->dbg_flag) {
475       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
478       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
479       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480     }
481     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
482     if (n_vertices) {
483       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
484       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
485       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
486     }
487     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
488     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
489     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
490 
491     if (pcbddc->dbg_flag) {
492       /* assemble subdomain vector on nodes */
493       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
494       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
495       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
496       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
498       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
499       /* assemble subdomain vector of lagrange multipliers */
500       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
501       if (n_vertices) {
502         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
503         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
504         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
505       }
506       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
507       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
508       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
510       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
512       /* check saddle point solution */
513       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
514       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
516       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
517       /* shift by the identity matrix */
518       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
519       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
522     }
523   }
524   /* call assembling routines for local coarse basis */
525   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   if (pcbddc->switch_static || pcbddc->dbg_flag) {
528     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   }
531 
532   /* compute other basis functions for non-symmetric problems */
533   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
534   if (!setsym || (setsym && !issym)) {
535     if (!pcbddc->coarse_psi_B) {
536       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
537       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
538       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
539       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
540     }
541     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
542       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
543       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
544       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
545       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
546     }
547     for (i=0;i<pcbddc->local_primal_size;i++) {
548       if (n_constraints) {
549         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
550         for (j=0;j<n_constraints;j++) {
551           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
552         }
553         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
554         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
555       }
556       if (i<n_vertices) {
557         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
558         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
559         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
560         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
561         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
562         if (n_constraints) {
563           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
564         }
565       } else {
566         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
567       }
568       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
569       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
570       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
573       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
574       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       if (i<n_vertices) {
576         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
577       }
578       if (pcbddc->switch_static || pcbddc->dbg_flag) {
579         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
580         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
582         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
583         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584       }
585 
586       if (pcbddc->dbg_flag) {
587         /* assemble subdomain vector on nodes */
588         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
589         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
590         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
591         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         if (i<n_vertices) {
593           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
594         }
595         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
596         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
597         /* assemble subdomain vector of lagrange multipliers */
598         for (j=0;j<pcbddc->local_primal_size;j++) {
599           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
600         }
601         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
602         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
603         /* check saddle point solution */
604         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
605         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
607         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
608         /* shift by the identity matrix */
609         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
610         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
611         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
613       }
614     }
615     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
616     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     if (pcbddc->switch_static || pcbddc->dbg_flag) {
618       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     }
621   }
622   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
623   /* Checking coarse_sub_mat and coarse basis functios */
624   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
625   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
626   if (pcbddc->dbg_flag) {
627     Mat         coarse_sub_mat;
628     Mat         AUXMAT,TM1,TM2,TM3,TM4;
629     Mat         coarse_phi_D,coarse_phi_B;
630     Mat         coarse_psi_D,coarse_psi_B;
631     Mat         A_II,A_BB,A_IB,A_BI;
632     MatType     checkmattype=MATSEQAIJ;
633     PetscReal   real_value;
634 
635     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
636     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
637     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
638     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
639     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
640     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
641     if (pcbddc->coarse_psi_B) {
642       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
643       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
644     }
645     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
646 
647     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
648     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
649     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
650     if (pcbddc->coarse_psi_B) {
651       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
652       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
653       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
654       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
655       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
656       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
657       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
658       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
659       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
660       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
661       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
662       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
663     } else {
664       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
665       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
666       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
667       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
668       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
669       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
670       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
671       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
672     }
673     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
674     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
675     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
676     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
677     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
679     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
680     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
681     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
684     for (i=0;i<pcbddc->local_primal_size;i++) {
685       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
686     }
687     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
688     for (i=0;i<pcbddc->local_primal_size;i++) {
689       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
690     }
691     if (pcbddc->coarse_psi_B) {
692       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
693       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
694         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
695       }
696       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
697       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
698         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
699       }
700     }
701     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
702     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
703     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
706     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
707     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
710     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
711     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
712     if (pcbddc->coarse_psi_B) {
713       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
714       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
715     }
716     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
717     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
718     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
719     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
720   }
721   /* free memory */
722   if (n_vertices) {
723     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
724     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
725     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
727     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
728   }
729   if (n_constraints) {
730     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
731     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
732     ierr = MatDestroy(&M1);CHKERRQ(ierr);
733     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
734   }
735   ierr = PetscFree(auxindices);CHKERRQ(ierr);
736   /* get back data */
737   *coarse_submat_vals_n = coarse_submat_vals;
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
743 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
744 {
745   PC_IS*            pcis = (PC_IS*)(pc->data);
746   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
747   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
748   PetscBool         issbaij,isbaij;
749   /* manage repeated solves */
750   MatReuse          reuse;
751   MatStructure      matstruct;
752   PetscErrorCode    ierr;
753 
754   PetscFunctionBegin;
755   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
756     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
757   }
758   /* get mat flags */
759   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
760   reuse = MAT_INITIAL_MATRIX;
761   if (pc->setupcalled) {
762     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
763     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
764     if (matstruct == SAME_NONZERO_PATTERN) {
765       reuse = MAT_REUSE_MATRIX;
766     } else {
767       reuse = MAT_INITIAL_MATRIX;
768     }
769   }
770   if (reuse == MAT_INITIAL_MATRIX) {
771     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
772     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
773     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
774     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
775     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
776   }
777 
778   /* transform local matrices if needed */
779   if (pcbddc->use_change_of_basis) {
780     Mat         change_mat_all;
781     PetscScalar *row_cmat_values;
782     PetscInt    *row_cmat_indices;
783     PetscInt    *nnz,*is_indices,*temp_indices;
784     PetscInt    i,j,k,n_D,n_B;
785 
786     /* Get Non-overlapping dimensions */
787     n_B = pcis->n_B;
788     n_D = pcis->n-n_B;
789 
790     /* compute nonzero structure of change of basis on all local nodes */
791     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
792     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
793     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
794     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
795     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
796     k=1;
797     for (i=0;i<n_B;i++) {
798       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
799       nnz[is_indices[i]]=j;
800       if (k < j) k = j;
801       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
802     }
803     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
804     /* assemble change of basis matrix on the whole set of local dofs */
805     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
806     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
807     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
808     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
809     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
810     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
811     for (i=0;i<n_D;i++) {
812       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
813     }
814     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
815     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816     for (i=0;i<n_B;i++) {
817       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
818       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
819       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
820       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
821     }
822     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
825     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
826     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
827     if (!issbaij && !isbaij) {
828       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
829     } else {
830       Mat work_mat;
831       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
832       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
833       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
834     }
835     /*
836     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
837     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
838     */
839     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
840     ierr = PetscFree(nnz);CHKERRQ(ierr);
841     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
842   } else {
843     /* without change of basis, the local matrix is unchanged */
844     if (!pcbddc->local_mat) {
845       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
846       pcbddc->local_mat = matis->A;
847     }
848   }
849 
850   /* get submatrices */
851   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
852   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
853   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
854   if (!issbaij) {
855     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
856     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
857   } else {
858     Mat newmat;
859     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
860     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
861     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
862     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
869 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
870 {
871   PC_IS*         pcis = (PC_IS*)(pc->data);
872   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
873   IS             is_aux1,is_aux2;
874   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
875   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
876   PetscInt       vbs,bs;
877   PetscBT        bitmask;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   /* No need to setup local scatters if primal space is unchanged */
882   if (!pcbddc->new_primal_space_local && !pcbddc->dbg_flag) {
883     PetscFunctionReturn(0);
884   }
885   /* destroy old objects */
886   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
887   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
888   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
889   /* Set Non-overlapping dimensions */
890   n_B = pcis->n_B; n_D = pcis->n - n_B;
891   n_vertices = pcbddc->n_actual_vertices;
892   /* create auxiliary bitmask */
893   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
894   for (i=0;i<n_vertices;i++) {
895     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
896   }
897 
898   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
899   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
900   for (i=0, n_R=0; i<pcis->n; i++) {
901     if (!PetscBTLookup(bitmask,i)) {
902       idx_R_local[n_R] = i;
903       n_R++;
904     }
905   }
906 
907   /* Block code */
908   vbs = 1;
909   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
910   if (bs>1 && !(n_vertices%bs)) {
911     PetscBool is_blocked = PETSC_TRUE;
912     PetscInt  *vary;
913     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
914     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
915     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
916     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
917     for (i=0; i<n_vertices; i++) {
918       if (vary[i]!=0 && vary[i]!=bs) {
919         is_blocked = PETSC_FALSE;
920         break;
921       }
922     }
923     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
924       vbs = bs;
925       for (i=0;i<n_R/vbs;i++) {
926         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
927       }
928     }
929     ierr = PetscFree(vary);CHKERRQ(ierr);
930   }
931   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
932   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
933 
934   /* print some info if requested */
935   if (pcbddc->dbg_flag) {
936     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
937     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
938     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
939     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
940     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
941     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
942     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
944   }
945 
946   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
947   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
948   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
949   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
950   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
951   for (i=0; i<n_D; i++) {
952     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
953   }
954   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
955   for (i=0, j=0; i<n_R; i++) {
956     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
957       aux_array1[j++] = i;
958     }
959   }
960   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
961   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
962   for (i=0, j=0; i<n_B; i++) {
963     if (!PetscBTLookup(bitmask,is_indices[i])) {
964       aux_array2[j++] = i;
965     }
966   }
967   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
969   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
970   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
971   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
972 
973   if (pcbddc->switch_static || pcbddc->dbg_flag) {
974     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
975     for (i=0, j=0; i<n_R; i++) {
976       if (PetscBTLookup(bitmask,idx_R_local[i])) {
977         aux_array1[j++] = i;
978       }
979     }
980     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
981     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
982     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
983   }
984   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
985   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
992 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
993 {
994   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
995   PC_IS          *pcis = (PC_IS*)pc->data;
996   PC             pc_temp;
997   Mat            A_RR;
998   MatStructure   matstruct;
999   MatReuse       reuse;
1000   PetscScalar    m_one = -1.0;
1001   PetscReal      value;
1002   PetscInt       n_D,n_R,ibs,mbs;
1003   PetscBool      use_exact,use_exact_reduced,issbaij;
1004   PetscErrorCode ierr;
1005   /* prefixes stuff */
1006   char           dir_prefix[256],neu_prefix[256],str_level[3];
1007   size_t         len;
1008 
1009   PetscFunctionBegin;
1010   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1011 
1012   /* compute prefixes */
1013   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1014   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1015   if (!pcbddc->current_level) {
1016     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1017     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1018     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1019     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1020   } else {
1021     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1022     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1023     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1024     len -= 15; /* remove "pc_bddc_coarse_" */
1025     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1026     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1027     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1028     *(dir_prefix+len)='\0';
1029     *(neu_prefix+len)='\0';
1030     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1031     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1032     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1033     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1034   }
1035 
1036   /* DIRICHLET PROBLEM */
1037   /* Matrix for Dirichlet problem is pcis->A_II */
1038   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1039   if (!pcbddc->ksp_D) { /* create object if not yet build */
1040     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1041     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1042     /* default */
1043     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1044     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1045     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1046     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1047     if (issbaij) {
1048       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1049     } else {
1050       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1051     }
1052     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1053   }
1054   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1055   /* Allow user's customization */
1056   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1057   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1058   if (!n_D) {
1059     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1060     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1061   }
1062   /* Set Up KSP for Dirichlet problem of BDDC */
1063   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1064   /* set ksp_D into pcis data */
1065   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1066   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1067   pcis->ksp_D = pcbddc->ksp_D;
1068 
1069   /* NEUMANN PROBLEM */
1070   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1071   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1072   if (pcbddc->ksp_R) { /* already created ksp */
1073     PetscInt nn_R;
1074     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1075     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1076     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1077     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1078       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1079       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1080       reuse = MAT_INITIAL_MATRIX;
1081     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1082       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1083         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1084         reuse = MAT_INITIAL_MATRIX;
1085       } else { /* safe to reuse the matrix */
1086         reuse = MAT_REUSE_MATRIX;
1087       }
1088     }
1089     /* last check */
1090     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1091       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1092       reuse = MAT_INITIAL_MATRIX;
1093     }
1094   } else { /* first time, so we need to create the matrix */
1095     reuse = MAT_INITIAL_MATRIX;
1096   }
1097   /* extract A_RR */
1098   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1099   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1100   if (ibs != mbs) {
1101     Mat newmat;
1102     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1103     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1104     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1105   } else {
1106     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1107   }
1108   if (!pcbddc->ksp_R) { /* create object if not present */
1109     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1110     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1111     /* default */
1112     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1113     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1114     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1115     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1116     if (issbaij) {
1117       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1118     } else {
1119       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1120     }
1121     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1122   }
1123   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1124   /* Allow user's customization */
1125   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1126   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1127   if (!n_R) {
1128     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1129     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1130   }
1131   /* Set Up KSP for Neumann problem of BDDC */
1132   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1133 
1134   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1135   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1136     /* Dirichlet */
1137     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1138     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1139     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1140     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1141     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1142     /* need to be adapted? */
1143     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1144     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1145     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1146     /* print info */
1147     if (pcbddc->dbg_flag) {
1148       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1149       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1150       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1151       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1152       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1153       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1154     }
1155     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1156       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1157     }
1158 
1159     /* Neumann */
1160     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1161     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1162     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1163     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1164     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1165     /* need to be adapted? */
1166     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1167     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1168     /* print info */
1169     if (pcbddc->dbg_flag) {
1170       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1171       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1172     }
1173     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1174       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1175     }
1176   }
1177   /* free Neumann problem's matrix */
1178   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1184 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1185 {
1186   PetscErrorCode ierr;
1187   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1188 
1189   PetscFunctionBegin;
1190   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1191   if (pcbddc->local_auxmat1) {
1192     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1193     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1200 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1201 {
1202   PetscErrorCode ierr;
1203   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1204   PC_IS*            pcis = (PC_IS*)  (pc->data);
1205   const PetscScalar zero = 0.0;
1206 
1207   PetscFunctionBegin;
1208   /* Application of PHI^T (or PSI^T)  */
1209   if (pcbddc->coarse_psi_B) {
1210     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1211     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1212   } else {
1213     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1214     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1215   }
1216   /* Scatter data of coarse_rhs */
1217   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1218   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1219 
1220   /* Local solution on R nodes */
1221   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1222   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1223   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   if (pcbddc->switch_static) {
1225     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1227   }
1228   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1229   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1230   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   if (pcbddc->switch_static) {
1233     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235   }
1236 
1237   /* Coarse solution */
1238   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1240     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1241   }
1242   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1243   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1244 
1245   /* Sum contributions from two levels */
1246   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1247   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1253 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1254 {
1255   PetscErrorCode ierr;
1256   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1257 
1258   PetscFunctionBegin;
1259   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1265 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1266 {
1267   PetscErrorCode ierr;
1268   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1269 
1270   PetscFunctionBegin;
1271   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 /* uncomment for testing purposes */
1276 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1277 #undef __FUNCT__
1278 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1279 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1280 {
1281   PetscErrorCode    ierr;
1282   PC_IS*            pcis = (PC_IS*)(pc->data);
1283   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1284   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1285   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1286   MatType           impMatType=MATSEQAIJ;
1287   /* one and zero */
1288   PetscScalar       one=1.0,zero=0.0;
1289   /* space to store constraints and their local indices */
1290   PetscScalar       *temp_quadrature_constraint;
1291   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1292   /* iterators */
1293   PetscInt          i,j,k,total_counts,temp_start_ptr;
1294   /* stuff to store connected components stored in pcbddc->mat_graph */
1295   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1296   PetscInt          n_ISForFaces,n_ISForEdges;
1297   /* near null space stuff */
1298   MatNullSpace      nearnullsp;
1299   const Vec         *nearnullvecs;
1300   Vec               *localnearnullsp;
1301   PetscBool         nnsp_has_cnst;
1302   PetscInt          nnsp_size;
1303   PetscScalar       *array;
1304   /* BLAS integers */
1305   PetscBLASInt      lwork,lierr;
1306   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1307   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1308   /* LAPACK working arrays for SVD or POD */
1309   PetscBool         skip_lapack;
1310   PetscScalar       *work;
1311   PetscReal         *singular_vals;
1312 #if defined(PETSC_USE_COMPLEX)
1313   PetscReal         *rwork;
1314 #endif
1315 #if defined(PETSC_MISSING_LAPACK_GESVD)
1316   PetscBLASInt      Blas_one_2=1;
1317   PetscScalar       *temp_basis,*correlation_mat;
1318 #else
1319   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1320   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1321 #endif
1322   /* reuse */
1323   PetscInt          olocal_primal_size;
1324   PetscInt          *oprimal_indices_local_idxs;
1325   /* change of basis */
1326   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1327   PetscBool         boolforchange,qr_needed;
1328   PetscBT           touched,change_basis,qr_needed_idx;
1329   /* auxiliary stuff */
1330   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1331   /* some quantities */
1332   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1333   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1334 
1335 
1336   PetscFunctionBegin;
1337   /* Destroy Mat objects computed previously */
1338   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1339   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1340   /* Get index sets for faces, edges and vertices from graph */
1341   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1342     pcbddc->use_vertices = PETSC_TRUE;
1343   }
1344   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1345   /* HACK: provide functions to set change of basis */
1346   if (!ISForVertices && pcbddc->NullSpace) {
1347     pcbddc->use_change_of_basis = PETSC_TRUE;
1348     pcbddc->use_change_on_faces = PETSC_FALSE;
1349   }
1350   /* print some info */
1351   if (pcbddc->dbg_flag) {
1352     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1353     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1354     i = 0;
1355     if (ISForVertices) {
1356       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1359     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1360     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1361     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1362   }
1363   /* check if near null space is attached to global mat */
1364   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1365   if (nearnullsp) {
1366     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1367     /* remove any stored info */
1368     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1369     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1370     /* store information for BDDC solver reuse */
1371     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1372     pcbddc->onearnullspace = nearnullsp;
1373     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1374     for (i=0;i<nnsp_size;i++) {
1375       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1376     }
1377   } else { /* if near null space is not provided BDDC uses constants by default */
1378     nnsp_size = 0;
1379     nnsp_has_cnst = PETSC_TRUE;
1380   }
1381   /* get max number of constraints on a single cc */
1382   max_constraints = nnsp_size;
1383   if (nnsp_has_cnst) max_constraints++;
1384 
1385   /*
1386        Evaluate maximum storage size needed by the procedure
1387        - temp_indices will contain start index of each constraint stored as follows
1388        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1389        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1390        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1391                                                                                                                                                          */
1392   total_counts = n_ISForFaces+n_ISForEdges;
1393   total_counts *= max_constraints;
1394   n_vertices = 0;
1395   if (ISForVertices) {
1396     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1397   }
1398   total_counts += n_vertices;
1399   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1400   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1401   total_counts = 0;
1402   max_size_of_constraint = 0;
1403   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1404     if (i<n_ISForEdges) {
1405       used_IS = &ISForEdges[i];
1406     } else {
1407       used_IS = &ISForFaces[i-n_ISForEdges];
1408     }
1409     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1410     total_counts += j;
1411     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1412   }
1413   total_counts *= max_constraints;
1414   total_counts += n_vertices;
1415   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1416   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1417   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1418   /* get local part of global near null space vectors */
1419   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1420   for (k=0;k<nnsp_size;k++) {
1421     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1422     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424   }
1425 
1426   /* whether or not to skip lapack calls */
1427   skip_lapack = PETSC_TRUE;
1428   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1429 
1430   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1431   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1432     PetscScalar temp_work;
1433 #if defined(PETSC_MISSING_LAPACK_GESVD)
1434     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1435     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1436     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1437     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1438 #if defined(PETSC_USE_COMPLEX)
1439     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1440 #endif
1441     /* now we evaluate the optimal workspace using query with lwork=-1 */
1442     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1443     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1444     lwork = -1;
1445     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1446 #if !defined(PETSC_USE_COMPLEX)
1447     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1448 #else
1449     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1450 #endif
1451     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1452     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1453 #else /* on missing GESVD */
1454     /* SVD */
1455     PetscInt max_n,min_n;
1456     max_n = max_size_of_constraint;
1457     min_n = max_constraints;
1458     if (max_size_of_constraint < max_constraints) {
1459       min_n = max_size_of_constraint;
1460       max_n = max_constraints;
1461     }
1462     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1463 #if defined(PETSC_USE_COMPLEX)
1464     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1465 #endif
1466     /* now we evaluate the optimal workspace using query with lwork=-1 */
1467     lwork = -1;
1468     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1469     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1470     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1471     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1472 #if !defined(PETSC_USE_COMPLEX)
1473     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1474 #else
1475     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1476 #endif
1477     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1478     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1479 #endif /* on missing GESVD */
1480     /* Allocate optimal workspace */
1481     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1482     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1483   }
1484   /* Now we can loop on constraining sets */
1485   total_counts = 0;
1486   temp_indices[0] = 0;
1487   /* vertices */
1488   if (ISForVertices) {
1489     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1490     if (nnsp_has_cnst) { /* consider all vertices */
1491       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1492       for (i=0;i<n_vertices;i++) {
1493         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1494         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1495         total_counts++;
1496       }
1497     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1498       PetscBool used_vertex;
1499       for (i=0;i<n_vertices;i++) {
1500         used_vertex = PETSC_FALSE;
1501         k = 0;
1502         while (!used_vertex && k<nnsp_size) {
1503           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1504           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1505             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1506             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1507             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1508             total_counts++;
1509             used_vertex = PETSC_TRUE;
1510           }
1511           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1512           k++;
1513         }
1514       }
1515     }
1516     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1517     n_vertices = total_counts;
1518   }
1519 
1520   /* edges and faces */
1521   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1522     if (i<n_ISForEdges) {
1523       used_IS = &ISForEdges[i];
1524       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1525     } else {
1526       used_IS = &ISForFaces[i-n_ISForEdges];
1527       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1528     }
1529     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1530     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1531     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1532     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1533     /* change of basis should not be performed on local periodic nodes */
1534     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1535     if (nnsp_has_cnst) {
1536       PetscScalar quad_value;
1537       temp_constraints++;
1538       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1539       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1540       for (j=0;j<size_of_constraint;j++) {
1541         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1542       }
1543       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1544       total_counts++;
1545     }
1546     for (k=0;k<nnsp_size;k++) {
1547       PetscReal real_value;
1548       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1549       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1550       for (j=0;j<size_of_constraint;j++) {
1551         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1552       }
1553       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1554       /* check if array is null on the connected component */
1555       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1556       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1557       if (real_value > 0.0) { /* keep indices and values */
1558         temp_constraints++;
1559         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1560         total_counts++;
1561       }
1562     }
1563     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1564     valid_constraints = temp_constraints;
1565     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1566     if (!pcbddc->use_nnsp_true && temp_constraints) {
1567       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1568 
1569 #if defined(PETSC_MISSING_LAPACK_GESVD)
1570       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1571          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1572          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1573             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1574             from that computed using LAPACKgesvd
1575          -> This is due to a different computation of eigenvectors in LAPACKheev
1576          -> The quality of the POD-computed basis will be the same */
1577       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1578       /* Store upper triangular part of correlation matrix */
1579       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1580       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1581       for (j=0;j<temp_constraints;j++) {
1582         for (k=0;k<j+1;k++) {
1583           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1584         }
1585       }
1586       /* compute eigenvalues and eigenvectors of correlation matrix */
1587       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1588       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1589 #if !defined(PETSC_USE_COMPLEX)
1590       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1591 #else
1592       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1593 #endif
1594       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1595       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1596       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1597       j=0;
1598       while (j < temp_constraints && singular_vals[j] < tol) j++;
1599       total_counts=total_counts-j;
1600       valid_constraints = temp_constraints-j;
1601       /* scale and copy POD basis into used quadrature memory */
1602       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1603       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1604       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1605       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1606       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1607       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1608       if (j<temp_constraints) {
1609         PetscInt ii;
1610         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1611         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1612         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1613         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1614         for (k=0;k<temp_constraints-j;k++) {
1615           for (ii=0;ii<size_of_constraint;ii++) {
1616             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1617           }
1618         }
1619       }
1620 #else  /* on missing GESVD */
1621       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1622       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1623       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1624       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1625 #if !defined(PETSC_USE_COMPLEX)
1626       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1627 #else
1628       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1629 #endif
1630       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1631       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1632       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1633       k = temp_constraints;
1634       if (k > size_of_constraint) k = size_of_constraint;
1635       j = 0;
1636       while (j < k && singular_vals[k-j-1] < tol) j++;
1637       total_counts = total_counts-temp_constraints+k-j;
1638       valid_constraints = k-j;
1639 #endif /* on missing GESVD */
1640     }
1641     /* setting change_of_basis flag is safe now */
1642     if (boolforchange) {
1643       for (j=0;j<valid_constraints;j++) {
1644         PetscBTSet(change_basis,total_counts-j-1);
1645       }
1646     }
1647   }
1648   /* free index sets of faces, edges and vertices */
1649   for (i=0;i<n_ISForFaces;i++) {
1650     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1651   }
1652   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1653   for (i=0;i<n_ISForEdges;i++) {
1654     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1655   }
1656   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1657   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1658   /* map temp_indices_to_constraint in boundary numbering */
1659   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1660   if (i != temp_indices[total_counts]) {
1661     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1662   }
1663 
1664   /* free workspace */
1665   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1666     ierr = PetscFree(work);CHKERRQ(ierr);
1667 #if defined(PETSC_USE_COMPLEX)
1668     ierr = PetscFree(rwork);CHKERRQ(ierr);
1669 #endif
1670     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1671 #if defined(PETSC_MISSING_LAPACK_GESVD)
1672     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1673     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1674 #endif
1675   }
1676   for (k=0;k<nnsp_size;k++) {
1677     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1678   }
1679   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1680 
1681   /* set quantities in pcbddc data structure and store previous primal size */
1682   /* n_vertices defines the number of subdomain corners in the primal space */
1683   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1684   olocal_primal_size = pcbddc->local_primal_size;
1685   pcbddc->local_primal_size = total_counts;
1686   pcbddc->n_vertices = n_vertices;
1687   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1688 
1689   /* Create constraint matrix */
1690   /* The constraint matrix is used to compute the l2g map of primal dofs */
1691   /* so we need to set it up properly either with or without change of basis */
1692   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1693   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1694   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1695   /* array to compute a local numbering of constraints : vertices first then constraints */
1696   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1697   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1698   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1699   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1700   /* auxiliary stuff for basis change */
1701   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1702   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1703 
1704   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1705   total_primal_vertices=0;
1706   for (i=0;i<pcbddc->local_primal_size;i++) {
1707     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1708     if (size_of_constraint == 1) {
1709       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1710       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1711       aux_primal_minloc[total_primal_vertices]=0;
1712       total_primal_vertices++;
1713     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1714       PetscInt min_loc,min_index;
1715       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1716       /* find first untouched local node */
1717       k = 0;
1718       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1719       min_index = global_indices[k];
1720       min_loc = k;
1721       /* search the minimum among global nodes already untouched on the cc */
1722       for (k=1;k<size_of_constraint;k++) {
1723         /* there can be more than one constraint on a single connected component */
1724         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1725           min_index = global_indices[k];
1726           min_loc = k;
1727         }
1728       }
1729       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1730       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1731       aux_primal_minloc[total_primal_vertices]=min_loc;
1732       total_primal_vertices++;
1733     }
1734   }
1735   /* determine if a QR strategy is needed for change of basis */
1736   qr_needed = PETSC_FALSE;
1737   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1738   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1739     if (PetscBTLookup(change_basis,i)) {
1740       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1741       j = 0;
1742       for (k=0;k<size_of_constraint;k++) {
1743         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1744           j++;
1745         }
1746       }
1747       /* found more than one primal dof on the cc */
1748       if (j > 1) {
1749         PetscBTSet(qr_needed_idx,i);
1750         qr_needed = PETSC_TRUE;
1751       }
1752     }
1753   }
1754   /* free workspace */
1755   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1756 
1757   /* permute indices in order to have a sorted set of vertices */
1758   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1759 
1760   /* nonzero structure of constraint matrix */
1761   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1762   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1763   j=total_primal_vertices;
1764   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1765     if (!PetscBTLookup(change_basis,i)) {
1766       nnz[j]=temp_indices[i+1]-temp_indices[i];
1767       j++;
1768     }
1769   }
1770   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1771   ierr = PetscFree(nnz);CHKERRQ(ierr);
1772   /* set values in constraint matrix */
1773   for (i=0;i<total_primal_vertices;i++) {
1774     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1775   }
1776   total_counts = total_primal_vertices;
1777   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1778     if (!PetscBTLookup(change_basis,i)) {
1779       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1780       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1781       total_counts++;
1782     }
1783   }
1784   /* assembling */
1785   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1786   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1787   /*
1788   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1789   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1790   */
1791   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1792   if (pcbddc->use_change_of_basis) {
1793     /* dual and primal dofs on a single cc */
1794     PetscInt     dual_dofs,primal_dofs;
1795     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1796     PetscInt     primal_counter;
1797     /* working stuff for GEQRF */
1798     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1799     PetscBLASInt lqr_work;
1800     /* working stuff for UNGQR */
1801     PetscScalar  *gqr_work,lgqr_work_t;
1802     PetscBLASInt lgqr_work;
1803     /* working stuff for TRTRS */
1804     PetscScalar  *trs_rhs;
1805     PetscBLASInt Blas_NRHS;
1806     /* pointers for values insertion into change of basis matrix */
1807     PetscInt     *start_rows,*start_cols;
1808     PetscScalar  *start_vals;
1809     /* working stuff for values insertion */
1810     PetscBT      is_primal;
1811 
1812     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1813     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1814     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1815     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1816     /* work arrays */
1817     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1818     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1819     /* nonzeros per row */
1820     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1821       if (PetscBTLookup(change_basis,i)) {
1822         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1823         if (PetscBTLookup(qr_needed_idx,i)) {
1824           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1825         } else {
1826           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1827           /* get local primal index on the cc */
1828           j = 0;
1829           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1830           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1831         }
1832       }
1833     }
1834     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1835     ierr = PetscFree(nnz);CHKERRQ(ierr);
1836     /* Set initial identity in the matrix */
1837     for (i=0;i<pcis->n_B;i++) {
1838       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1839     }
1840 
1841     if (pcbddc->dbg_flag) {
1842       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1843       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1844     }
1845 
1846 
1847     /* Now we loop on the constraints which need a change of basis */
1848     /*
1849        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1850        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1851 
1852        Basic blocks of change of basis matrix T computed
1853 
1854           - Using the following block transformation if there is only a primal dof on the cc
1855             (in the example, primal dof is the last one of the edge in LOCAL ordering
1856              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1857             | 1        0   ...        0     1 |
1858             | 0        1   ...        0     1 |
1859             |              ...                |
1860             | 0        ...            1     1 |
1861             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1862 
1863           - via QR decomposition of constraints otherwise
1864     */
1865     if (qr_needed) {
1866       /* space to store Q */
1867       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1868       /* first we issue queries for optimal work */
1869       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1870       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1871       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1872       lqr_work = -1;
1873       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1874       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1875       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1876       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1877       lgqr_work = -1;
1878       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1879       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1880       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1881       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1882       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1883       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1884       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1885       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1886       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1887       /* array to store scaling factors for reflectors */
1888       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1889       /* array to store rhs and solution of triangular solver */
1890       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1891       /* allocating workspace for check */
1892       if (pcbddc->dbg_flag) {
1893         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1894       }
1895     }
1896     /* array to store whether a node is primal or not */
1897     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1898     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1899     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1900     if (i != total_primal_vertices) {
1901       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1902     }
1903     for (i=0;i<total_primal_vertices;i++) {
1904       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1905     }
1906     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1907 
1908     /* loop on constraints and see whether or not they need a change of basis and compute it */
1909     /* -> using implicit ordering contained in temp_indices data */
1910     total_counts = pcbddc->n_vertices;
1911     primal_counter = total_counts;
1912     while (total_counts<pcbddc->local_primal_size) {
1913       primal_dofs = 1;
1914       if (PetscBTLookup(change_basis,total_counts)) {
1915         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1916         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1917           primal_dofs++;
1918         }
1919         /* get constraint info */
1920         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1921         dual_dofs = size_of_constraint-primal_dofs;
1922 
1923         if (pcbddc->dbg_flag) {
1924           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1925         }
1926 
1927         if (primal_dofs > 1) { /* QR */
1928 
1929           /* copy quadrature constraints for change of basis check */
1930           if (pcbddc->dbg_flag) {
1931             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1932           }
1933           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1934           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1935 
1936           /* compute QR decomposition of constraints */
1937           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1938           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1939           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1940           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1941           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1942           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1943           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1944 
1945           /* explictly compute R^-T */
1946           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1947           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1948           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1949           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1950           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1951           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1952           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1953           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1954           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1955           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1956 
1957           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1958           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1959           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1960           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1961           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1962           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1963           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1964           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1965           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1966 
1967           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1968              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1969              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1970           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1971           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1972           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1973           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1974           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1975           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1976           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1977           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1978           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1979           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1980 
1981           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1982           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1983           /* insert cols for primal dofs */
1984           for (j=0;j<primal_dofs;j++) {
1985             start_vals = &qr_basis[j*size_of_constraint];
1986             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1987             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1988           }
1989           /* insert cols for dual dofs */
1990           for (j=0,k=0;j<dual_dofs;k++) {
1991             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1992               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1993               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1994               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1995               j++;
1996             }
1997           }
1998 
1999           /* check change of basis */
2000           if (pcbddc->dbg_flag) {
2001             PetscInt   ii,jj;
2002             PetscBool valid_qr=PETSC_TRUE;
2003             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2004             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2005             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2006             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2007             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2008             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2009             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2010             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2011             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2012             for (jj=0;jj<size_of_constraint;jj++) {
2013               for (ii=0;ii<primal_dofs;ii++) {
2014                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2015                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2016               }
2017             }
2018             if (!valid_qr) {
2019               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2020               for (jj=0;jj<size_of_constraint;jj++) {
2021                 for (ii=0;ii<primal_dofs;ii++) {
2022                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2023                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2024                   }
2025                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2026                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2027                   }
2028                 }
2029               }
2030             } else {
2031               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2032             }
2033           }
2034         } else { /* simple transformation block */
2035           PetscInt row,col;
2036           PetscScalar val;
2037           for (j=0;j<size_of_constraint;j++) {
2038             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2039             if (!PetscBTLookup(is_primal,row)) {
2040               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2041               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2042               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2043             } else {
2044               for (k=0;k<size_of_constraint;k++) {
2045                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2046                 if (row != col) {
2047                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2048                 } else {
2049                   val = 1.0;
2050                 }
2051                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2052               }
2053             }
2054           }
2055         }
2056         /* increment primal counter */
2057         primal_counter += primal_dofs;
2058       } else {
2059         if (pcbddc->dbg_flag) {
2060           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2061         }
2062       }
2063       /* increment constraint counter total_counts */
2064       total_counts += primal_dofs;
2065     }
2066 
2067     /* free workspace */
2068     if (qr_needed) {
2069       if (pcbddc->dbg_flag) {
2070         ierr = PetscFree(work);CHKERRQ(ierr);
2071       }
2072       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2073       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2074       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2075       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2076       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2077     }
2078     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2079     /* assembling */
2080     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082     /*
2083     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2084     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2085     */
2086   }
2087 
2088   /* get indices in local ordering for vertices and constraints */
2089   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2090     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2091     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2092   }
2093   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2094   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2095   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2096   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2097   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2098   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2099   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2100   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2101   /* set quantities in PCBDDC data struct */
2102   pcbddc->n_actual_vertices = i;
2103   /* check if a new primal space has been introduced */
2104   pcbddc->new_primal_space_local = PETSC_TRUE;
2105   if (olocal_primal_size == pcbddc->local_primal_size) {
2106     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2107     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2108     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2109   }
2110   /* need to set pcbddc->new_primal_space_local to true when the local primal space is empty, otherwise pcbddc->is_R_local will not be created */
2111   if (!pcbddc->local_primal_size) pcbddc->new_primal_space_local = PETSC_TRUE;
2112   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2113   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2114 
2115   /* flush dbg viewer */
2116   if (pcbddc->dbg_flag) {
2117     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2118   }
2119 
2120   /* free workspace */
2121   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2122   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2123   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2124   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2125   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2126   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2127   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2128   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2134 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2135 {
2136   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2137   PC_IS       *pcis = (PC_IS*)pc->data;
2138   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2139   PetscInt    ierr,i,vertex_size;
2140   PetscViewer viewer=pcbddc->dbg_viewer;
2141 
2142   PetscFunctionBegin;
2143   /* Reset previously computed graph */
2144   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2145   /* Init local Graph struct */
2146   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2147 
2148   /* Check validity of the csr graph passed in by the user */
2149   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2150     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2151   }
2152 
2153   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2154   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2155     Mat mat_adj;
2156     const PetscInt *xadj,*adjncy;
2157     PetscBool flg_row=PETSC_TRUE;
2158 
2159     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2160     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2161     if (!flg_row) {
2162       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2163     }
2164     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2165     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2166     if (!flg_row) {
2167       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2168     }
2169     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2170   }
2171 
2172   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2173   vertex_size = 1;
2174   if (pcbddc->user_provided_isfordofs) {
2175     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2176       ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2177       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2178         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2179         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2180       }
2181       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2182       pcbddc->n_ISForDofs = 0;
2183       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2184     }
2185     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2186     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2187   } else {
2188     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2189       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2190       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2191       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2192         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2193       }
2194     }
2195   }
2196 
2197   /* Setup of Graph */
2198   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2199     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2200   }
2201   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2202     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2203   }
2204   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2205 
2206   /* Graph's connected components analysis */
2207   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2208 
2209   /* print some info to stdout */
2210   if (pcbddc->dbg_flag) {
2211     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2212   }
2213 
2214   /* mark topography has done */
2215   pcbddc->recompute_topography = PETSC_FALSE;
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2221 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2222 {
2223   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2224   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2225   PetscErrorCode ierr;
2226 
2227   PetscFunctionBegin;
2228   n = 0;
2229   vertices = 0;
2230   if (pcbddc->ConstraintMatrix) {
2231     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2232     for (i=0;i<local_primal_size;i++) {
2233       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2234       if (size_of_constraint == 1) n++;
2235       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2236     }
2237     if (vertices_idx) {
2238       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2239       n = 0;
2240       for (i=0;i<local_primal_size;i++) {
2241         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2242         if (size_of_constraint == 1) {
2243           vertices[n++]=row_cmat_indices[0];
2244         }
2245         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2246       }
2247     }
2248   }
2249   *n_vertices = n;
2250   if (vertices_idx) *vertices_idx = vertices;
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2256 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2257 {
2258   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2259   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2260   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2261   PetscBT        touched;
2262   PetscErrorCode ierr;
2263 
2264     /* This function assumes that the number of local constraints per connected component
2265        is not greater than the number of nodes defined for the connected component
2266        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2267   PetscFunctionBegin;
2268   n = 0;
2269   constraints_index = 0;
2270   if (pcbddc->ConstraintMatrix) {
2271     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2272     max_size_of_constraint = 0;
2273     for (i=0;i<local_primal_size;i++) {
2274       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2275       if (size_of_constraint > 1) {
2276         n++;
2277       }
2278       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2279       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2280     }
2281     if (constraints_idx) {
2282       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2283       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2284       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2285       n = 0;
2286       for (i=0;i<local_primal_size;i++) {
2287         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2288         if (size_of_constraint > 1) {
2289           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2290           /* find first untouched local node */
2291           j = 0;
2292           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2293           min_index = row_cmat_global_indices[j];
2294           min_loc = j;
2295           /* search the minimum among nodes not yet touched on the connected component
2296              since there can be more than one constraint on a single cc */
2297           for (j=1;j<size_of_constraint;j++) {
2298             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2299               min_index = row_cmat_global_indices[j];
2300               min_loc = j;
2301             }
2302           }
2303           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2304           constraints_index[n++] = row_cmat_indices[min_loc];
2305         }
2306         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2307       }
2308       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2309       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2310     }
2311   }
2312   *n_constraints = n;
2313   if (constraints_idx) *constraints_idx = constraints_index;
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /* the next two functions has been adapted from pcis.c */
2318 #undef __FUNCT__
2319 #define __FUNCT__ "PCBDDCApplySchur"
2320 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2321 {
2322   PetscErrorCode ierr;
2323   PC_IS          *pcis = (PC_IS*)(pc->data);
2324 
2325   PetscFunctionBegin;
2326   if (!vec2_B) { vec2_B = v; }
2327   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2328   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2329   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2330   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2331   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2337 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2338 {
2339   PetscErrorCode ierr;
2340   PC_IS          *pcis = (PC_IS*)(pc->data);
2341 
2342   PetscFunctionBegin;
2343   if (!vec2_B) { vec2_B = v; }
2344   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2345   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2346   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2347   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2348   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "PCBDDCSubsetNumbering"
2354 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2355 {
2356   Vec            local_vec,global_vec;
2357   IS             seqis,paris;
2358   VecScatter     scatter_ctx;
2359   PetscScalar    *array;
2360   PetscInt       *temp_global_dofs;
2361   PetscScalar    globalsum;
2362   PetscInt       i,j,s;
2363   PetscInt       nlocals,first_index,old_index,max_local;
2364   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2365   PetscMPIInt    *dof_sizes,*dof_displs;
2366   PetscBool      first_found;
2367   PetscErrorCode ierr;
2368 
2369   PetscFunctionBegin;
2370   /* mpi buffers */
2371   MPI_Comm_size(comm,&size_prec_comm);
2372   MPI_Comm_rank(comm,&rank_prec_comm);
2373   j = ( !rank_prec_comm ? size_prec_comm : 0);
2374   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2375   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2376   /* get maximum size of subset */
2377   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2378   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2379   max_local = 0;
2380   if (n_local_dofs) {
2381     max_local = temp_global_dofs[0];
2382     for (i=1;i<n_local_dofs;i++) {
2383       if (max_local < temp_global_dofs[i] ) {
2384         max_local = temp_global_dofs[i];
2385       }
2386     }
2387   }
2388   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2389   max_global++;
2390   max_local = 0;
2391   if (n_local_dofs) {
2392     max_local = local_dofs[0];
2393     for (i=1;i<n_local_dofs;i++) {
2394       if (max_local < local_dofs[i] ) {
2395         max_local = local_dofs[i];
2396       }
2397     }
2398   }
2399   max_local++;
2400   /* allocate workspace */
2401   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2402   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2403   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2404   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2405   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2406   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2407   /* create scatter */
2408   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2409   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2410   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2411   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2412   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2413   /* init array */
2414   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2415   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2416   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2417   if (local_dofs_mult) {
2418     for (i=0;i<n_local_dofs;i++) {
2419       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2420     }
2421   } else {
2422     for (i=0;i<n_local_dofs;i++) {
2423       array[local_dofs[i]]=1.0;
2424     }
2425   }
2426   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2427   /* scatter into global vec and get total number of global dofs */
2428   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2429   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2430   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2431   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2432   /* Fill global_vec with cumulative function for global numbering */
2433   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2434   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2435   nlocals = 0;
2436   first_index = -1;
2437   first_found = PETSC_FALSE;
2438   for (i=0;i<s;i++) {
2439     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2440       first_found = PETSC_TRUE;
2441       first_index = i;
2442     }
2443     nlocals += (PetscInt)PetscRealPart(array[i]);
2444   }
2445   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2446   if (!rank_prec_comm) {
2447     dof_displs[0]=0;
2448     for (i=1;i<size_prec_comm;i++) {
2449       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2450     }
2451   }
2452   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2453   if (first_found) {
2454     array[first_index] += (PetscScalar)nlocals;
2455     old_index = first_index;
2456     for (i=first_index+1;i<s;i++) {
2457       if (PetscRealPart(array[i]) > 0.0) {
2458         array[i] += array[old_index];
2459         old_index = i;
2460       }
2461     }
2462   }
2463   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2464   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2465   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2466   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2467   /* get global ordering of local dofs */
2468   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2469   if (local_dofs_mult) {
2470     for (i=0;i<n_local_dofs;i++) {
2471       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2472     }
2473   } else {
2474     for (i=0;i<n_local_dofs;i++) {
2475       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2476     }
2477   }
2478   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2479   /* free workspace */
2480   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2481   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2482   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2483   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2484   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2485   /* return pointer to global ordering of local dofs */
2486   *global_numbering_subset = temp_global_dofs;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2492 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2493 {
2494   PetscInt       i,j;
2495   PetscScalar    *alphas;
2496   PetscErrorCode ierr;
2497 
2498   PetscFunctionBegin;
2499   /* this implements stabilized Gram-Schmidt */
2500   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2501   for (i=0;i<n;i++) {
2502     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2503     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2504     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2505   }
2506   ierr = PetscFree(alphas);CHKERRQ(ierr);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 /* TODO
2511    - now preallocation is done assuming SEQDENSE local matrices
2512 */
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatISGetMPIXAIJ"
2515 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2516 {
2517   Mat                    new_mat;
2518   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2519   /* info on mat */
2520   /* ISLocalToGlobalMapping rmapping,cmapping; */
2521   PetscInt               bs,rows,cols;
2522   PetscInt               lrows,lcols;
2523   PetscInt               local_rows,local_cols;
2524   PetscBool              isdense;
2525   /* values insertion */
2526   PetscScalar            *array;
2527   PetscInt               *local_indices,*global_indices;
2528   /* work */
2529   PetscInt               i,j,index_row;
2530   PetscErrorCode         ierr;
2531 
2532   PetscFunctionBegin;
2533   /* MISSING CHECKS
2534     - rectangular case not covered (it is not allowed by MATIS)
2535   */
2536   /* get info from mat */
2537   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2538   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2539   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2540   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2541 
2542   /* work */
2543   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2544   for (i=0;i<local_rows;i++) local_indices[i]=i;
2545   /* map indices of local mat to global values */
2546   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2547   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2548   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2549 
2550   if (reuse==MAT_INITIAL_MATRIX) {
2551     Vec         vec_dnz,vec_onz;
2552     PetscScalar *my_dnz,*my_onz;
2553     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2554     PetscInt    index_col,owner;
2555     PetscMPIInt nsubdomains;
2556 
2557     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2558     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2559     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2560     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2561     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2562     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2563     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2564 
2565     /*
2566       preallocation
2567     */
2568 
2569     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2570     /*
2571        Some vectors are needed to sum up properly on shared interface dofs.
2572        Preallocation macros cannot do the job.
2573        Note that preallocation is not exact, since it overestimates nonzeros
2574     */
2575     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2576     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2577     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2578     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2579     /* All processes need to compute entire row ownership */
2580     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2581     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2582     for (i=0;i<nsubdomains;i++) {
2583       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2584         row_ownership[j]=i;
2585       }
2586     }
2587 
2588     /*
2589        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2590        then, they will be summed up properly. This way, preallocation is always sufficient
2591     */
2592     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2593     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2594     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2595     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2596     for (i=0;i<local_rows;i++) {
2597       index_row = global_indices[i];
2598       for (j=i;j<local_rows;j++) {
2599         owner = row_ownership[index_row];
2600         index_col = global_indices[j];
2601         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2602           my_dnz[i] += 1.0;
2603         } else { /* offdiag block */
2604           my_onz[i] += 1.0;
2605         }
2606         /* same as before, interchanging rows and cols */
2607         if (i != j) {
2608           owner = row_ownership[index_col];
2609           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2610             my_dnz[j] += 1.0;
2611           } else {
2612             my_onz[j] += 1.0;
2613           }
2614         }
2615       }
2616     }
2617     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2618     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2619     if (local_rows) { /* multilevel guard */
2620       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2621       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2622     }
2623     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2624     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2625     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2626     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2627     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2628     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2629     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2630 
2631     /* set computed preallocation in dnz and onz */
2632     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2633     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2634     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2635     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2636     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2637     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2638     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2639     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2640 
2641     /* Resize preallocation if overestimated */
2642     for (i=0;i<lrows;i++) {
2643       dnz[i] = PetscMin(dnz[i],lcols);
2644       onz[i] = PetscMin(onz[i],cols-lcols);
2645     }
2646     /* set preallocation */
2647     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2648     for (i=0;i<lrows/bs;i++) {
2649       dnz[i] = dnz[i*bs]/bs;
2650       onz[i] = onz[i*bs]/bs;
2651     }
2652     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2653     for (i=0;i<lrows/bs;i++) {
2654       dnz[i] = dnz[i]-i;
2655     }
2656     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2657     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2658     *M = new_mat;
2659   } else {
2660     PetscInt mbs,mrows,mcols;
2661     /* some checks */
2662     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2663     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2664     if (mrows != rows) {
2665       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2666     }
2667     if (mrows != rows) {
2668       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2669     }
2670     if (mbs != bs) {
2671       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2672     }
2673     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2674   }
2675   /* set local to global mappings */
2676   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2677   /* Set values */
2678   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2679   if (isdense) { /* special case for dense local matrices */
2680     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2681     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2682     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2683     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2684     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2685     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2686   } else { /* very basic values insertion for all other matrix types */
2687     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2688     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2689     for (i=0;i<local_rows;i++) {
2690       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2691       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2692       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2693       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2694       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2695       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2696     }
2697   }
2698   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2699   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2700   if (isdense) {
2701     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2702   }
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatISGetSubassemblingPattern"
2708 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2709 {
2710   Mat             subdomain_adj;
2711   IS              new_ranks,ranks_send_to;
2712   MatPartitioning partitioner;
2713   Mat_IS          *matis;
2714   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2715   PetscInt        prank;
2716   PetscMPIInt     size,rank,color;
2717   PetscInt        *xadj,*adjncy,*oldranks;
2718   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2719   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2720   PetscErrorCode  ierr;
2721   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2722   PetscSubcomm    subcomm;
2723 
2724   PetscFunctionBegin;
2725   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2726   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2727   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2728 
2729   /* Get info on mapping */
2730   matis = (Mat_IS*)(mat->data);
2731   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2732   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2733 
2734   /* build local CSR graph of subdomains' connectivity */
2735   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2736   xadj[0] = 0;
2737   xadj[1] = PetscMax(n_neighs-1,0);
2738   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2739   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2740 
2741   if (threshold) {
2742     PetscInt* count,min_threshold;
2743     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2744     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2745     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2746       for (j=0;j<n_shared[i];j++) {
2747         count[shared[i][j]] += 1;
2748       }
2749     }
2750     /* adapt threshold since we dont want to lose significant connections */
2751     min_threshold = n_neighs;
2752     for (i=1;i<n_neighs;i++) {
2753       for (j=0;j<n_shared[i];j++) {
2754         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2755       }
2756     }
2757     threshold = PetscMax(min_threshold+1,threshold);
2758     xadj[1] = 0;
2759     for (i=1;i<n_neighs;i++) {
2760       for (j=0;j<n_shared[i];j++) {
2761         if (count[shared[i][j]] < threshold) {
2762           adjncy[xadj[1]] = neighs[i];
2763           adjncy_wgt[xadj[1]] = n_shared[i];
2764           xadj[1]++;
2765           break;
2766         }
2767       }
2768     }
2769     ierr = PetscFree(count);CHKERRQ(ierr);
2770   } else {
2771     if (xadj[1]) {
2772       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2773       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2774     }
2775   }
2776   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2777   if (use_square) {
2778     for (i=0;i<xadj[1];i++) {
2779       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2780     }
2781   }
2782   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2783 
2784   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2785 
2786   /*
2787     Restrict work on active processes only.
2788   */
2789   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2790   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2791   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2792   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2793   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2794   if (color) {
2795     ierr = PetscFree(xadj);CHKERRQ(ierr);
2796     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2797     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2798   } else {
2799     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2800     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2801     prank = rank;
2802     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2803     /*
2804     for (i=0;i<size;i++) {
2805       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2806     }
2807     */
2808     for (i=0;i<xadj[1];i++) {
2809       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2810     }
2811     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2812     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2813     n_subdomains = (PetscInt)size;
2814     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2815 
2816     /* Partition */
2817     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2818     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2819     if (use_vwgt) {
2820       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2821       v_wgt[0] = local_size;
2822       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2823     }
2824     /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */
2825     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2826     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2827     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2828     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2829 
2830     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2831     /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */
2832     ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2833     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2834     /* clean up */
2835     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2836     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2837     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2838     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2839   }
2840   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2841 
2842   /* assemble parallel IS for sends */
2843   i = 1;
2844   if (color) i=0;
2845   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2846   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2847 
2848   /* get back IS */
2849   *is_sends = ranks_send_to;
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatISSubassemble"
2857 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n)
2858 {
2859   Mat                    local_mat;
2860   Mat_IS                 *matis;
2861   IS                     is_sends_internal;
2862   PetscInt               rows,cols;
2863   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2864   PetscBool              ismatis,isdense;
2865   ISLocalToGlobalMapping l2gmap;
2866   PetscInt*              l2gmap_indices;
2867   const PetscInt*        is_indices;
2868   MatType                new_local_type;
2869   MatTypePrivate         new_local_type_private;
2870   /* buffers */
2871   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2872   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2873   /* MPI */
2874   MPI_Comm               comm;
2875   PetscMPIInt            n_sends,n_recvs,commsize;
2876   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2877   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2878   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2879   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2880   PetscErrorCode         ierr;
2881 
2882   PetscFunctionBegin;
2883   /* checks */
2884   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2885   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2886   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2887   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2888   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2889   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2890   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2891   if (reuse == MAT_REUSE_MATRIX) {
2892     PetscInt mrows,mcols,mnrows,mncols;
2893     PetscBool ismatis;
2894     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2895     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2896     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2897     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2898     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2899     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2900   }
2901   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2902   PetscValidLogicalCollectiveInt(mat,bs,0);
2903   /* prepare IS for sending if not provided */
2904   if (!is_sends) {
2905     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2906     ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2907   } else {
2908     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2909     is_sends_internal = is_sends;
2910   }
2911 
2912   /* get pointer of MATIS data */
2913   matis = (Mat_IS*)mat->data;
2914 
2915   /* get comm */
2916   comm = PetscObjectComm((PetscObject)mat);
2917 
2918   /* compute number of sends */
2919   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2920   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2921 
2922   /* compute number of receives */
2923   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2924   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2925   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2926   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2927   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2928   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2929   ierr = PetscFree(iflags);CHKERRQ(ierr);
2930 
2931   /* prepare send/receive buffers */
2932   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2933   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2934   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2935   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2936 
2937   /* Get data from local mat */
2938   if (!isdense) {
2939     /* TODO: See below some guidelines on how to prepare the local buffers */
2940     /*
2941        send_buffer_vals should contain the raw values of the local matrix
2942        send_buffer_idxs should contain:
2943        - MatType_PRIVATE type
2944        - PetscInt        size_of_l2gmap
2945        - PetscInt        global_row_indices[size_of_l2gmap]
2946        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2947     */
2948   } else {
2949     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2950     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2951     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2952     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2953     send_buffer_idxs[1] = i;
2954     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2955     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2956     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2957     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2958     for (i=0;i<n_sends;i++) {
2959       ilengths_vals[is_indices[i]] = len*len;
2960       ilengths_idxs[is_indices[i]] = len+2;
2961     }
2962   }
2963   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2964   buf_size_idxs = 0;
2965   buf_size_vals = 0;
2966   for (i=0;i<n_recvs;i++) {
2967     buf_size_idxs += (PetscInt)olengths_idxs[i];
2968     buf_size_vals += (PetscInt)olengths_vals[i];
2969   }
2970   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2971   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2972 
2973   /* get new tags for clean communications */
2974   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2975   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2976 
2977   /* allocate for requests */
2978   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2979   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2980   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2981   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2982 
2983   /* communications */
2984   ptr_idxs = recv_buffer_idxs;
2985   ptr_vals = recv_buffer_vals;
2986   for (i=0;i<n_recvs;i++) {
2987     source_dest = onodes[i];
2988     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2989     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2990     ptr_idxs += olengths_idxs[i];
2991     ptr_vals += olengths_vals[i];
2992   }
2993   for (i=0;i<n_sends;i++) {
2994     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2995     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2996     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2997   }
2998   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2999   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3000 
3001   /* assemble new l2g map */
3002   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3003   ptr_idxs = recv_buffer_idxs;
3004   buf_size_idxs = 0;
3005   for (i=0;i<n_recvs;i++) {
3006     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3007     ptr_idxs += olengths_idxs[i];
3008   }
3009   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
3010   ptr_idxs = recv_buffer_idxs;
3011   buf_size_idxs = 0;
3012   for (i=0;i<n_recvs;i++) {
3013     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3014     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3015     ptr_idxs += olengths_idxs[i];
3016   }
3017   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3018   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3019   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3020 
3021   /* infer new local matrix type from received local matrices type */
3022   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3023   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3024   new_local_type_private = MATAIJ_PRIVATE;
3025   new_local_type = MATSEQAIJ;
3026   if (n_recvs) {
3027     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3028     ptr_idxs = recv_buffer_idxs;
3029     for (i=0;i<n_recvs;i++) {
3030       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3031         new_local_type_private = MATAIJ_PRIVATE;
3032         break;
3033       }
3034       ptr_idxs += olengths_idxs[i];
3035     }
3036     switch (new_local_type_private) {
3037       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3038         new_local_type = MATSEQAIJ;
3039         bs = 1;
3040         break;
3041       case MATAIJ_PRIVATE:
3042         new_local_type = MATSEQAIJ;
3043         bs = 1;
3044         break;
3045       case MATBAIJ_PRIVATE:
3046         new_local_type = MATSEQBAIJ;
3047         break;
3048       case MATSBAIJ_PRIVATE:
3049         new_local_type = MATSEQSBAIJ;
3050         break;
3051       default:
3052         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3053         break;
3054     }
3055   }
3056 
3057   /* create MATIS object if needed */
3058   if (reuse == MAT_INITIAL_MATRIX) {
3059     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3060     ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3061   } else {
3062     /* it also destroys the local matrices */
3063     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3064   }
3065   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3066   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3067   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3068   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3069 
3070   /* set values */
3071   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3072   ptr_vals = recv_buffer_vals;
3073   ptr_idxs = recv_buffer_idxs;
3074   for (i=0;i<n_recvs;i++) {
3075     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3076       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3077       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3078       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3079       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3080       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3081     }
3082     ptr_idxs += olengths_idxs[i];
3083     ptr_vals += olengths_vals[i];
3084   }
3085   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3086   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3087   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3088   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3089 
3090   { /* check */
3091     Vec       lvec,rvec;
3092     PetscReal infty_error;
3093 
3094     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3095     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3096     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3097     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3098     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3099     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3100     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3101     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3102     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3103   }
3104 
3105   /* free workspace */
3106   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3107   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3108   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3109   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3110   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3111   if (isdense) {
3112     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3113     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3114   } else {
3115     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3116   }
3117   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3118   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3119   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3120   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3121   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3122   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3123   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3124   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3125   ierr = PetscFree(onodes);CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 
3129 #undef __FUNCT__
3130 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3131 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3132 {
3133   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3134   PC_IS                  *pcis = (PC_IS*)pc->data;
3135   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3136   MatNullSpace           CoarseNullSpace=NULL;
3137   ISLocalToGlobalMapping coarse_islg;
3138   IS                     coarse_is;
3139   PetscInt               max_it;
3140   PetscInt               im_active=-1,active_procs=-1;
3141   PC                     pc_temp;
3142   PCType                 coarse_pc_type;
3143   KSPType                coarse_ksp_type;
3144   PetscBool              multilevel_requested,multilevel_allowed;
3145   PetscBool              setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse;
3146   MatStructure           matstruct;
3147   PetscErrorCode         ierr;
3148 
3149   PetscFunctionBegin;
3150   /* Assign global numbering to coarse dofs */
3151   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3152     PetscInt ocoarse_size;
3153     ocoarse_size = pcbddc->coarse_size;
3154     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3155     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3156     /* see if we can avoid some work */
3157     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3158       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3159         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3160         coarse_reuse = PETSC_FALSE;
3161       } else { /* we can safely reuse already computed coarse matrix */
3162         coarse_reuse = PETSC_TRUE;
3163       }
3164     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3165       coarse_reuse = PETSC_FALSE;
3166     }
3167     /* reset any subassembling information */
3168     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3169   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3170     coarse_reuse = PETSC_TRUE;
3171   }
3172 
3173   /* test if we can go multilevel */
3174   multilevel_allowed = PETSC_FALSE;
3175   multilevel_requested = PETSC_FALSE;
3176   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3177   if (multilevel_requested) {
3178     /* count "active processes" */
3179     im_active = !!(pcis->n);
3180     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3181     if (active_procs/pcbddc->coarsening_ratio < 2) {
3182       multilevel_allowed = PETSC_FALSE;
3183     } else {
3184       multilevel_allowed = PETSC_TRUE;
3185     }
3186   }
3187 
3188   /* set defaults for coarse KSP and PC */
3189   if (multilevel_allowed) {
3190     coarse_ksp_type = KSPRICHARDSON;
3191     coarse_pc_type = PCBDDC;
3192   } else {
3193     coarse_ksp_type = KSPPREONLY;
3194     coarse_pc_type = PCREDUNDANT;
3195   }
3196 
3197   /* create the coarse KSP object only once with defaults */
3198   if (!pcbddc->coarse_ksp) {
3199     char prefix[256],str_level[3];
3200     size_t len;
3201     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3202     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3203     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3204     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3205     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3206     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3207     /* prefix */
3208     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3209     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3210     if (!pcbddc->current_level) {
3211       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3212       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3213     } else {
3214       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3215       if (pcbddc->current_level>1) len -= 2;
3216       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3217       *(prefix+len)='\0';
3218       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3219       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3220     }
3221     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3222   }
3223   /* allow user customization */
3224   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3225   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3226   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3227 
3228   /* get some info after set from options */
3229   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3230   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3231   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3232   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3233     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3234     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3235     isbddc = PETSC_FALSE;
3236   }
3237 
3238   /* creates temporary l2gmap and IS for coarse indexes */
3239   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3240   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3241 
3242   /* propagate BDDC info to the next level */
3243   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3244   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3245   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3246   if (isbddc) { /* protects from unneded computations */
3247     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3248     const PetscInt         *idxs;
3249     IS                     *c_isfordofs,c_isneumann;
3250     ISLocalToGlobalMapping tmap;
3251 
3252     /* create map between primal indices (in local representative ordering) and local subdomain numbering */
3253     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3254     /* allocate space for temporary storage */
3255     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3256     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3257     /* dofs splitting */
3258     ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr);
3259     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3260       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3261       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3262       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3263       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3264       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3265       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3266       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr);
3267       /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */
3268     }
3269     ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr);
3270     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3271       ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr);
3272     }
3273     ierr = PetscFree(c_isfordofs);CHKERRQ(ierr);
3274     /* neumann boundaries */
3275     if (pcbddc->NeumannBoundariesLocal) {
3276       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3277       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3278       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3279       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3280       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3281       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3282       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr);
3283       /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */
3284       ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr);
3285     }
3286     ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr);
3287     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3288     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3289     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3290   }
3291 
3292   /* creates temporary MATIS object for coarse matrix */
3293   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3294   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3295   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3296   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3297   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3298   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3299   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3300 
3301   /* assemble coarse matrix */
3302   if (isbddc || isnn) {
3303     if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3304       ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3305     }
3306     if (coarse_reuse) {
3307       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3308       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3309       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3310     } else {
3311       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3312     }
3313   } else {
3314     if (coarse_reuse) {
3315       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3316       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3317       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3318     } else {
3319       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3320     }
3321   }
3322   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3323 
3324   /* create local to global scatters for coarse problem */
3325   if (pcbddc->new_primal_space) {
3326     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3327     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3328     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3329     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3330     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3331   }
3332   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3333 
3334   /* propagate symmetry info to coarse matrix */
3335   issym = PETSC_FALSE;
3336   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3337   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3338   isherm = PETSC_FALSE;
3339   ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr);
3340   ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3341 
3342   /* Compute coarse null space (special handling by BDDC only) */
3343   if (pcbddc->NullSpace) {
3344     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3345     if (isbddc) {
3346       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3347     } else {
3348       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3349     }
3350   }
3351 
3352   /* set operators */
3353   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3354   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3355 
3356   /* additional KSP customization */
3357   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3358   if (max_it < 5) {
3359     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3360   }
3361 
3362   /* print some info if requested */
3363   if (pcbddc->dbg_flag) {
3364     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3365     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3366     if (!multilevel_allowed) {
3367       if (multilevel_requested) {
3368         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3369       } else if (pcbddc->max_levels) {
3370         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3371       }
3372     }
3373     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3374     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3375   }
3376 
3377   /* setup coarse ksp */
3378   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3379 
3380   /* Check coarse problem if in debug mode or if solving with an iterative method */
3381   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3382   if (pcbddc->dbg_flag || !ispreonly) {
3383     KSP       check_ksp;
3384     KSPType   check_ksp_type;
3385     PC        check_pc;
3386     Vec       check_vec;
3387     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3388     PetscInt  its;
3389     PetscBool compute;
3390 
3391     /* Create ksp object suitable for estimation of extreme eigenvalues */
3392     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3393     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3394     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3395     if (ispreonly) {
3396       check_ksp_type = KSPPREONLY;
3397       compute = PETSC_FALSE;
3398     } else {
3399       check_ksp_type = KSPGMRES;
3400       compute = PETSC_TRUE;
3401     }
3402     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3403     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3404     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3405     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3406     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3407     /* create random vec */
3408     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3409     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3410     if (CoarseNullSpace) {
3411       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3412     }
3413     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3414     /* solve coarse problem */
3415     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3416     if (CoarseNullSpace) {
3417       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3418     }
3419     /* set eigenvalue estimation if preonly has not been requested */
3420     if (!ispreonly) {
3421       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3422       /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */
3423       ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3424       ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3425     }
3426     /* check coarse problem residual error */
3427     if (pcbddc->dbg_flag) {
3428       ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3429       ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3430       ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3431       ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3432       ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3433       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3434       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3435       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3436       if (!ispreonly) {
3437         ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3438         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3439       }
3440       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3441     }
3442     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3443   }
3444 
3445   /* print additional info */
3446   if (pcbddc->dbg_flag) {
3447     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3448     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3449     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3450     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3451   }
3452 
3453   /* free memory */
3454   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3455   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3456   PetscFunctionReturn(0);
3457 }
3458 
3459 #undef __FUNCT__
3460 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3461 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3462 {
3463   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3464   PC_IS*         pcis = (PC_IS*)pc->data;
3465   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3466   PetscInt       i,coarse_size;
3467   PetscInt       *local_primal_indices;
3468   PetscErrorCode ierr;
3469 
3470   PetscFunctionBegin;
3471   /* Compute global number of coarse dofs */
3472   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3473     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3474   }
3475   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3476 
3477   /* check numbering */
3478   if (pcbddc->dbg_flag) {
3479     PetscScalar coarsesum,*array;
3480 
3481     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3482     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3483     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3484     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3485     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3486     for (i=0;i<pcbddc->local_primal_size;i++) {
3487       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3488     }
3489     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3490     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3491     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3492     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3493     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3494     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3495     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3496     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3497     for (i=0;i<pcis->n;i++) {
3498       if (array[i] == 1.0) {
3499         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3500       }
3501     }
3502     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3503     for (i=0;i<pcis->n;i++) {
3504       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3505     }
3506     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3507     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3508     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3509     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3510     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3511     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3512     if (pcbddc->dbg_flag > 1) {
3513       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3514       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3515       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3516       for (i=0;i<pcbddc->local_primal_size;i++) {
3517         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3518       }
3519       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3520     }
3521     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3522   }
3523   /* get back data */
3524   *coarse_size_n = coarse_size;
3525   *local_primal_indices_n = local_primal_indices;
3526   PetscFunctionReturn(0);
3527 }
3528 
3529 #undef __FUNCT__
3530 #define __FUNCT__ "PCBDDCGlobalToLocal"
3531 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3532 {
3533   PC_IS*         pcis = (PC_IS*)pc->data;
3534   IS             localis_t;
3535   PetscInt       i,lsize,*idxs;
3536   PetscScalar    *vals;
3537   PetscErrorCode ierr;
3538 
3539   PetscFunctionBegin;
3540   /* get dirichlet indices in local ordering exploiting local to global map */
3541   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3542   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3543   for (i=0;i<lsize;i++) vals[i] = 1.0;
3544   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3545   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3546   if (idxs) { /* multilevel guard */
3547     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3548   }
3549   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3550   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3551   ierr = PetscFree(vals);CHKERRQ(ierr);
3552   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3553   /* now compute dirichlet set in local ordering */
3554   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3555   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3556   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3557   for (i=0,lsize=0;i<pcis->n;i++) {
3558     if (vals[i] == 1.0) {
3559       lsize++;
3560     }
3561   }
3562   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3563   for (i=0,lsize=0;i<pcis->n;i++) {
3564     if (vals[i] == 1.0) {
3565       idxs[lsize++] = i;
3566     }
3567   }
3568   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3569   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3570   *localis = localis_t;
3571   PetscFunctionReturn(0);
3572 }
3573