xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 8563f5aa3a7295b05487b4a345d2c15ab8bda340)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   PetscFunctionReturn(0);
111 }
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
115 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
116 {
117   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
118   PC_IS          *pcis = (PC_IS*)pc->data;
119   VecType        impVecType;
120   PetscInt       n_constraints,n_R,old_size;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   if (!pcbddc->ConstraintMatrix) {
125     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
126   }
127   /* get sizes */
128   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
129   n_R = pcis->n-pcbddc->n_actual_vertices;
130   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
131   /* local work vectors (try to avoid unneeded work)*/
132   /* R nodes */
133   old_size = -1;
134   if (pcbddc->vec1_R) {
135     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
136   }
137   if (n_R != old_size) {
138     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
139     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
140     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
141     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
142     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
143     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
144   }
145   /* local primal dofs */
146   old_size = -1;
147   if (pcbddc->vec1_P) {
148     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
149   }
150   if (pcbddc->local_primal_size != old_size) {
151     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
152     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
153     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
154     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
155   }
156   /* local explicit constraints */
157   old_size = -1;
158   if (pcbddc->vec1_C) {
159     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
160   }
161   if (n_constraints && n_constraints != old_size) {
162     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
163     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
164     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
165     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
166   }
167   PetscFunctionReturn(0);
168 }
169 
170 #undef __FUNCT__
171 #define __FUNCT__ "PCBDDCSetUpCorrection"
172 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
173 {
174   PetscErrorCode         ierr;
175   /* pointers to pcis and pcbddc */
176   PC_IS*                 pcis = (PC_IS*)pc->data;
177   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
178   /* submatrices of local problem */
179   Mat                    A_RV,A_VR,A_VV;
180   /* working matrices */
181   Mat                    M1,M2,M3,C_CR;
182   /* working vectors */
183   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
184   /* additional working stuff */
185   IS                     is_aux;
186   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
187   const PetscScalar      *array,*row_cmat_values;
188   const PetscInt         *row_cmat_indices,*idx_R_local;
189   PetscInt               *idx_V_B,*auxindices;
190   PetscInt               n_vertices,n_constraints,size_of_constraint;
191   PetscInt               i,j,n_R,n_D,n_B;
192   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
193   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
194   MatType                impMatType;
195   /* some shortcuts to scalars */
196   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
197   /* for debugging purposes */
198   PetscReal              *coarsefunctions_errors,*constraints_errors;
199 
200   PetscFunctionBegin;
201   /* get number of vertices (corners plus constraints with change of basis)
202      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
203   n_vertices = pcbddc->n_actual_vertices;
204   n_constraints = pcbddc->local_primal_size-n_vertices;
205   /* Set Non-overlapping dimensions */
206   n_B = pcis->n_B; n_D = pcis->n - n_B;
207   n_R = pcis->n-n_vertices;
208 
209   /* Set types for local objects needed by BDDC precondtioner */
210   impMatType = MATSEQDENSE;
211 
212   /* Allocating some extra storage just to be safe */
213   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
214   for (i=0;i<pcis->n;i++) auxindices[i]=i;
215 
216   /* vertices in boundary numbering */
217   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
218   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
219   if (i != n_vertices) {
220     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
221   }
222 
223   /* Precompute stuffs needed for preprocessing and application of BDDC*/
224   if (n_constraints) {
225     /* see if we can save some allocations */
226     if (pcbddc->local_auxmat2) {
227       PetscInt on_R,on_constraints;
228       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
229       if (on_R != n_R || on_constraints != n_constraints) {
230         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
231         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
232       }
233     }
234     /* work vectors */
235     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
236     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
237     /* auxiliary matrices */
238     if (!pcbddc->local_auxmat2) {
239       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
240       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
241       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
242       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
243     }
244 
245     /* Extract constraints on R nodes: C_{CR}  */
246     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
247     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
248     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
249 
250     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
251     for (i=0;i<n_constraints;i++) {
252       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
253       /* Get row of constraint matrix in R numbering */
254       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
255       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
256       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
257       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
258       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
259       /* Solve for row of constraint matrix in R numbering */
260       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
261       /* Set values in local_auxmat2 */
262       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
263       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
264       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
265     }
266     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
267     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
268     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
269 
270     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
271     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
272     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
273     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
274     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
275     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
276     ierr = MatSetUp(M1);CHKERRQ(ierr);
277     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
278     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
279     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
280     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
281     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
282     ierr = MatDestroy(&M2);CHKERRQ(ierr);
283     ierr = MatDestroy(&M3);CHKERRQ(ierr);
284     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
285     if (!pcbddc->local_auxmat1) {
286       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
287     } else {
288       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
289     }
290   }
291 
292   /* Get submatrices from subdomain matrix */
293   if (n_vertices) {
294     PetscInt ibs,mbs;
295     PetscBool issbaij;
296     Mat newmat;
297 
298     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
299     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
300     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
301     if (ibs != mbs) { /* need to convert to SEQAIJ */
302       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
303       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
304       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
306       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
307     } else {
308       /* this is safe */
309       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
310       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
311       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
312         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
313         /* which of the two approaches is faster? */
314         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
315         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
316         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
317         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
318         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
319       } else {
320         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
321         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
322       }
323     }
324     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
325     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
326     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
327   }
328 
329   /* Matrix of coarse basis functions (local) */
330   if (pcbddc->coarse_phi_B) {
331     PetscInt on_B,on_primal;
332     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
333     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
334       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
335       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
336     }
337   }
338   if (pcbddc->coarse_phi_D) {
339     PetscInt on_D,on_primal;
340     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
341     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
342       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
343       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
344     }
345   }
346   if (!pcbddc->coarse_phi_B) {
347     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
348     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
349     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
350     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
351   }
352   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
353     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
354     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
355     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
356     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
357   }
358 
359   if (pcbddc->dbg_flag) {
360     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
361     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
362     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
363   }
364   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
365   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
366 
367   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
368 
369   /* vertices */
370   for (i=0;i<n_vertices;i++) {
371     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
372     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
373     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
374     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
375     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
376     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
377     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
378     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
379     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
381     if (n_constraints) {
382       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
383       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
384       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
385     }
386     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
387     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
388 
389     /* Set values in coarse basis function and subdomain part of coarse_mat */
390     /* coarse basis functions */
391     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
392     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
393     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
394     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
395     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
396     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
397     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
398     if (pcbddc->switch_static || pcbddc->dbg_flag) {
399       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
400       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
402       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
403       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
404     }
405     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
406     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
407     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
408     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
409     if (n_constraints) {
410       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
411       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
412       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
413     }
414 
415     /* check */
416     if (pcbddc->dbg_flag) {
417       /* assemble subdomain vector on local nodes */
418       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
419       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
420       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
421       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
422       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
423       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
424       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
425       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
426       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
427       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
428       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
429       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
430       if (n_constraints) {
431         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
432         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
433         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
434       }
435       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
436       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
438       /* check saddle point solution */
439       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
440       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
442       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
443       /* shift by the identity matrix */
444       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
445       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
446       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
448     }
449   }
450 
451   /* constraints */
452   for (i=0;i<n_constraints;i++) {
453     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
454     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
455     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
456     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
457     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
458     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
459     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
460     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
461     if (n_vertices) {
462       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
463     }
464     /* Set values in coarse basis function and subdomain part of coarse_mat */
465     /* coarse basis functions */
466     j = i+n_vertices; /* don't touch this! */
467     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
468     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
469     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
471     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
472     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
473     if (pcbddc->switch_static || pcbddc->dbg_flag) {
474       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
475       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
477       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
478       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
479     }
480     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
481     if (n_vertices) {
482       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
483       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
484       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
485     }
486     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
487     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
488     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
489 
490     if (pcbddc->dbg_flag) {
491       /* assemble subdomain vector on nodes */
492       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
493       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
494       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
495       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
496       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
497       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
498       /* assemble subdomain vector of lagrange multipliers */
499       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
500       if (n_vertices) {
501         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
502         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
503         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
504       }
505       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
506       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
507       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
508       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
509       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
510       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
511       /* check saddle point solution */
512       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
513       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
514       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
515       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
516       /* shift by the identity matrix */
517       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
518       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
519       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
521     }
522   }
523   /* call assembling routines for local coarse basis */
524   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
525   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   if (pcbddc->switch_static || pcbddc->dbg_flag) {
527     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529   }
530 
531   /* compute other basis functions for non-symmetric problems */
532   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
533   if (!setsym || (setsym && !issym)) {
534     if (!pcbddc->coarse_psi_B) {
535       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
536       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
537       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
538       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
539     }
540     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
541       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
542       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
543       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
544       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
545     }
546     for (i=0;i<pcbddc->local_primal_size;i++) {
547       if (n_constraints) {
548         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
549         for (j=0;j<n_constraints;j++) {
550           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
551         }
552         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
553         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
554       }
555       if (i<n_vertices) {
556         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
557         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
558         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
559         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
560         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
561         if (n_constraints) {
562           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
563         }
564       } else {
565         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
566       }
567       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
568       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
569       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
570       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
572       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
573       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
574       if (i<n_vertices) {
575         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
576       }
577       if (pcbddc->switch_static || pcbddc->dbg_flag) {
578         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
579         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
580         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
581         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
582         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
583       }
584 
585       if (pcbddc->dbg_flag) {
586         /* assemble subdomain vector on nodes */
587         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
588         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
589         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
590         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
591         if (i<n_vertices) {
592           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
593         }
594         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
595         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
596         /* assemble subdomain vector of lagrange multipliers */
597         for (j=0;j<pcbddc->local_primal_size;j++) {
598           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
599         }
600         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
601         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
602         /* check saddle point solution */
603         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
604         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
605         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
606         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
607         /* shift by the identity matrix */
608         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
609         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
610         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
611         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
612       }
613     }
614     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
615     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
616     if (pcbddc->switch_static || pcbddc->dbg_flag) {
617       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
618       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619     }
620   }
621   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
622   /* Checking coarse_sub_mat and coarse basis functios */
623   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
624   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
625   if (pcbddc->dbg_flag) {
626     Mat         coarse_sub_mat;
627     Mat         AUXMAT,TM1,TM2,TM3,TM4;
628     Mat         coarse_phi_D,coarse_phi_B;
629     Mat         coarse_psi_D,coarse_psi_B;
630     Mat         A_II,A_BB,A_IB,A_BI;
631     MatType     checkmattype=MATSEQAIJ;
632     PetscReal   real_value;
633 
634     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
635     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
636     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
637     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
638     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
639     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
640     if (pcbddc->coarse_psi_B) {
641       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
642       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
643     }
644     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
645 
646     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
647     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
648     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
649     if (pcbddc->coarse_psi_B) {
650       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
651       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
652       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
653       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
654       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
655       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
656       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
657       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
658       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
659       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
660       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
661       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
662     } else {
663       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
664       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
665       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
666       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
667       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
668       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
669       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
670       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
671     }
672     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
673     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
674     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
675     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
676     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
677     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
678     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
679     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
680     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
681     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
683     for (i=0;i<pcbddc->local_primal_size;i++) {
684       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
685     }
686     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
687     for (i=0;i<pcbddc->local_primal_size;i++) {
688       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
689     }
690     if (pcbddc->coarse_psi_B) {
691       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
692       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
693         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
694       }
695       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
696       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
697         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
698       }
699     }
700     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
701     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
702     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
703     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
705     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
706     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
707     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
709     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
710     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
711     if (pcbddc->coarse_psi_B) {
712       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
713       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
714     }
715     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
716     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
717     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
718     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
719   }
720   /* free memory */
721   if (n_vertices) {
722     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
723     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
724     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
725     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
727   }
728   if (n_constraints) {
729     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
730     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
731     ierr = MatDestroy(&M1);CHKERRQ(ierr);
732     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
733   }
734   ierr = PetscFree(auxindices);CHKERRQ(ierr);
735   /* get back data */
736   *coarse_submat_vals_n = coarse_submat_vals;
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
742 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
743 {
744   PC_IS*            pcis = (PC_IS*)(pc->data);
745   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
746   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
747   PetscBool         issbaij,isbaij;
748   /* manage repeated solves */
749   MatReuse          reuse;
750   MatStructure      matstruct;
751   PetscErrorCode    ierr;
752 
753   PetscFunctionBegin;
754   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
755     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
756   }
757   /* get mat flags */
758   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
759   reuse = MAT_INITIAL_MATRIX;
760   if (pc->setupcalled) {
761     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
762     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
763     if (matstruct == SAME_NONZERO_PATTERN) {
764       reuse = MAT_REUSE_MATRIX;
765     } else {
766       reuse = MAT_INITIAL_MATRIX;
767     }
768   }
769   if (reuse == MAT_INITIAL_MATRIX) {
770     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
771     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
772     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
773     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
774     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
775   }
776 
777   /* transform local matrices if needed */
778   if (pcbddc->use_change_of_basis) {
779     Mat         change_mat_all;
780     PetscScalar *row_cmat_values;
781     PetscInt    *row_cmat_indices;
782     PetscInt    *nnz,*is_indices,*temp_indices;
783     PetscInt    i,j,k,n_D,n_B;
784 
785     /* Get Non-overlapping dimensions */
786     n_B = pcis->n_B;
787     n_D = pcis->n-n_B;
788 
789     /* compute nonzero structure of change of basis on all local nodes */
790     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
791     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
792     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
793     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
794     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
795     k=1;
796     for (i=0;i<n_B;i++) {
797       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
798       nnz[is_indices[i]]=j;
799       if (k < j) k = j;
800       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
801     }
802     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
803     /* assemble change of basis matrix on the whole set of local dofs */
804     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
805     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
806     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
807     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
808     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
809     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
810     for (i=0;i<n_D;i++) {
811       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
812     }
813     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
814     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
815     for (i=0;i<n_B;i++) {
816       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
817       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
818       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
819       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
824     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
825     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
826     if (!issbaij && !isbaij) {
827       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
828     } else {
829       Mat work_mat;
830       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
831       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
832       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
833     }
834     /*
835     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
836     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
837     */
838     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
839     ierr = PetscFree(nnz);CHKERRQ(ierr);
840     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
841   } else {
842     /* without change of basis, the local matrix is unchanged */
843     if (!pcbddc->local_mat) {
844       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
845       pcbddc->local_mat = matis->A;
846     }
847   }
848 
849   /* get submatrices */
850   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
851   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
852   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
853   if (!issbaij) {
854     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
855     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
856   } else {
857     Mat newmat;
858     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
859     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
860     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
861     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
862   }
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
868 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
869 {
870   PC_IS*         pcis = (PC_IS*)(pc->data);
871   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
872   IS             is_aux1,is_aux2;
873   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
874   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
875   PetscInt       vbs,bs;
876   PetscBT        bitmask;
877   PetscErrorCode ierr;
878 
879   PetscFunctionBegin;
880   /* No need to setup local scatters if primal space is unchanged */
881   if (!pcbddc->new_primal_space_local) {
882     PetscFunctionReturn(0);
883   }
884   /* destroy old objects */
885   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
886   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
887   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
888   /* Set Non-overlapping dimensions */
889   n_B = pcis->n_B; n_D = pcis->n - n_B;
890   n_vertices = pcbddc->n_actual_vertices;
891   /* create auxiliary bitmask */
892   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
893   for (i=0;i<n_vertices;i++) {
894     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
895   }
896 
897   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
898   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
899   for (i=0, n_R=0; i<pcis->n; i++) {
900     if (!PetscBTLookup(bitmask,i)) {
901       idx_R_local[n_R] = i;
902       n_R++;
903     }
904   }
905 
906   /* Block code */
907   vbs = 1;
908   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
909   if (bs>1 && !(n_vertices%bs)) {
910     PetscBool is_blocked = PETSC_TRUE;
911     PetscInt  *vary;
912     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
913     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
914     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
915     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
916     for (i=0; i<n_vertices; i++) {
917       if (vary[i]!=0 && vary[i]!=bs) {
918         is_blocked = PETSC_FALSE;
919         break;
920       }
921     }
922     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
923       vbs = bs;
924       for (i=0;i<n_R/vbs;i++) {
925         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
926       }
927     }
928     ierr = PetscFree(vary);CHKERRQ(ierr);
929   }
930   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
931   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
932 
933   /* print some info if requested */
934   if (pcbddc->dbg_flag) {
935     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
936     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
937     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
938     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
939     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
940     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
941     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
942     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
943   }
944 
945   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
946   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
947   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
948   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
949   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
950   for (i=0; i<n_D; i++) {
951     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
952   }
953   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
954   for (i=0, j=0; i<n_R; i++) {
955     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
956       aux_array1[j++] = i;
957     }
958   }
959   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
960   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
961   for (i=0, j=0; i<n_B; i++) {
962     if (!PetscBTLookup(bitmask,is_indices[i])) {
963       aux_array2[j++] = i;
964     }
965   }
966   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
967   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
968   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
969   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
970   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
971 
972   if (pcbddc->switch_static || pcbddc->dbg_flag) {
973     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
974     for (i=0, j=0; i<n_R; i++) {
975       if (PetscBTLookup(bitmask,idx_R_local[i])) {
976         aux_array1[j++] = i;
977       }
978     }
979     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
980     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
981     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
982   }
983   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
984   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
985   PetscFunctionReturn(0);
986 }
987 
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
991 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
992 {
993   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
994   PC_IS          *pcis = (PC_IS*)pc->data;
995   PC             pc_temp;
996   Mat            A_RR;
997   MatStructure   matstruct;
998   MatReuse       reuse;
999   PetscScalar    m_one = -1.0;
1000   PetscReal      value;
1001   PetscInt       n_D,n_R,ibs,mbs;
1002   PetscBool      use_exact,use_exact_reduced,issbaij;
1003   PetscErrorCode ierr;
1004   /* prefixes stuff */
1005   char           dir_prefix[256],neu_prefix[256],str_level[3];
1006   size_t         len;
1007 
1008   PetscFunctionBegin;
1009   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1010 
1011   /* compute prefixes */
1012   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1013   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1014   if (!pcbddc->current_level) {
1015     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1016     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1017     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1018     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1019   } else {
1020     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1021     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1022     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1023     len -= 15; /* remove "pc_bddc_coarse_" */
1024     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1025     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1026     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1027     *(dir_prefix+len)='\0';
1028     *(neu_prefix+len)='\0';
1029     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1030     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1031     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1032     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1033   }
1034 
1035   /* DIRICHLET PROBLEM */
1036   /* Matrix for Dirichlet problem is pcis->A_II */
1037   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1038   if (!pcbddc->ksp_D) { /* create object if not yet build */
1039     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1040     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1041     /* default */
1042     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1043     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1044     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1045     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1046     if (issbaij) {
1047       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1048     } else {
1049       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1050     }
1051     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1052   }
1053   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1054   /* Allow user's customization */
1055   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1056   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1057   if (!n_D) {
1058     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1059     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1060   }
1061   /* Set Up KSP for Dirichlet problem of BDDC */
1062   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1063   /* set ksp_D into pcis data */
1064   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1065   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1066   pcis->ksp_D = pcbddc->ksp_D;
1067 
1068   /* NEUMANN PROBLEM */
1069   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1070   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1071   if (pcbddc->ksp_R) { /* already created ksp */
1072     PetscInt nn_R;
1073     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1074     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1075     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1076     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1077       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1078       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1079       reuse = MAT_INITIAL_MATRIX;
1080     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1081       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1082         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1083         reuse = MAT_INITIAL_MATRIX;
1084       } else { /* safe to reuse the matrix */
1085         reuse = MAT_REUSE_MATRIX;
1086       }
1087     }
1088     /* last check */
1089     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1090       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1091       reuse = MAT_INITIAL_MATRIX;
1092     }
1093   } else { /* first time, so we need to create the matrix */
1094     reuse = MAT_INITIAL_MATRIX;
1095   }
1096   /* extract A_RR */
1097   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1098   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1099   if (ibs != mbs) {
1100     Mat newmat;
1101     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1102     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1103     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1104   } else {
1105     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1106   }
1107   if (!pcbddc->ksp_R) { /* create object if not present */
1108     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1109     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1110     /* default */
1111     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1112     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1113     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1114     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1115     if (issbaij) {
1116       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1117     } else {
1118       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1119     }
1120     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1121   }
1122   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1123   /* Allow user's customization */
1124   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1125   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1126   if (!n_R) {
1127     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1128     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1129   }
1130   /* Set Up KSP for Neumann problem of BDDC */
1131   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1132 
1133   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1134   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1135     /* Dirichlet */
1136     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1137     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1138     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1139     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1140     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1141     /* need to be adapted? */
1142     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1143     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1144     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1145     /* print info */
1146     if (pcbddc->dbg_flag) {
1147       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1148       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1149       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1150       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1151       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1152       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1153     }
1154     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1155       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1156     }
1157 
1158     /* Neumann */
1159     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1160     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1161     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1162     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1163     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1164     /* need to be adapted? */
1165     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1166     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1167     /* print info */
1168     if (pcbddc->dbg_flag) {
1169       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1170       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1171     }
1172     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1173       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1174     }
1175   }
1176   /* free Neumann problem's matrix */
1177   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1183 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1184 {
1185   PetscErrorCode ierr;
1186   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1187 
1188   PetscFunctionBegin;
1189   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1190   if (pcbddc->local_auxmat1) {
1191     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1192     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1193   }
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1199 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1200 {
1201   PetscErrorCode ierr;
1202   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1203   PC_IS*            pcis = (PC_IS*)  (pc->data);
1204   const PetscScalar zero = 0.0;
1205 
1206   PetscFunctionBegin;
1207   /* Application of PHI^T (or PSI^T)  */
1208   if (pcbddc->coarse_psi_B) {
1209     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1210     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1211   } else {
1212     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1213     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1214   }
1215   /* Scatter data of coarse_rhs */
1216   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1217   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1218 
1219   /* Local solution on R nodes */
1220   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1221   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1222   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1223   if (pcbddc->switch_static) {
1224     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226   }
1227   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1228   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1229   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231   if (pcbddc->switch_static) {
1232     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1233     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234   }
1235 
1236   /* Coarse solution */
1237   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1239     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1240   }
1241   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1242   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1243 
1244   /* Sum contributions from two levels */
1245   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1246   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1252 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1253 {
1254   PetscErrorCode ierr;
1255   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1256 
1257   PetscFunctionBegin;
1258   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1264 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1265 {
1266   PetscErrorCode ierr;
1267   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1268 
1269   PetscFunctionBegin;
1270   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 /* uncomment for testing purposes */
1275 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1276 #undef __FUNCT__
1277 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1278 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1279 {
1280   PetscErrorCode    ierr;
1281   PC_IS*            pcis = (PC_IS*)(pc->data);
1282   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1283   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1284   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1285   MatType           impMatType=MATSEQAIJ;
1286   /* one and zero */
1287   PetscScalar       one=1.0,zero=0.0;
1288   /* space to store constraints and their local indices */
1289   PetscScalar       *temp_quadrature_constraint;
1290   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1291   /* iterators */
1292   PetscInt          i,j,k,total_counts,temp_start_ptr;
1293   /* stuff to store connected components stored in pcbddc->mat_graph */
1294   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1295   PetscInt          n_ISForFaces,n_ISForEdges;
1296   /* near null space stuff */
1297   MatNullSpace      nearnullsp;
1298   const Vec         *nearnullvecs;
1299   Vec               *localnearnullsp;
1300   PetscBool         nnsp_has_cnst;
1301   PetscInt          nnsp_size;
1302   PetscScalar       *array;
1303   /* BLAS integers */
1304   PetscBLASInt      lwork,lierr;
1305   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1306   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1307   /* LAPACK working arrays for SVD or POD */
1308   PetscBool         skip_lapack;
1309   PetscScalar       *work;
1310   PetscReal         *singular_vals;
1311 #if defined(PETSC_USE_COMPLEX)
1312   PetscReal         *rwork;
1313 #endif
1314 #if defined(PETSC_MISSING_LAPACK_GESVD)
1315   PetscBLASInt      Blas_one_2=1;
1316   PetscScalar       *temp_basis,*correlation_mat;
1317 #else
1318   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1319   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1320 #endif
1321   /* reuse */
1322   PetscInt          olocal_primal_size;
1323   PetscInt          *oprimal_indices_local_idxs;
1324   /* change of basis */
1325   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1326   PetscBool         boolforchange,qr_needed;
1327   PetscBT           touched,change_basis,qr_needed_idx;
1328   /* auxiliary stuff */
1329   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1330   /* some quantities */
1331   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1332   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1333 
1334 
1335   PetscFunctionBegin;
1336   /* Destroy Mat objects computed previously */
1337   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1338   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1339   /* Get index sets for faces, edges and vertices from graph */
1340   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1341     pcbddc->use_vertices = PETSC_TRUE;
1342   }
1343   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1344   /* HACK: provide functions to set change of basis */
1345   if (!ISForVertices && pcbddc->NullSpace) {
1346     pcbddc->use_change_of_basis = PETSC_TRUE;
1347     pcbddc->use_change_on_faces = PETSC_FALSE;
1348   }
1349   /* print some info */
1350   if (pcbddc->dbg_flag) {
1351     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1352     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1353     i = 0;
1354     if (ISForVertices) {
1355       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1356     }
1357     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1358     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1359     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1360     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1361   }
1362   /* check if near null space is attached to global mat */
1363   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1364   if (nearnullsp) {
1365     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1366     /* remove any stored info */
1367     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1368     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1369     /* store information for BDDC solver reuse */
1370     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1371     pcbddc->onearnullspace = nearnullsp;
1372     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1373     for (i=0;i<nnsp_size;i++) {
1374       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1375     }
1376   } else { /* if near null space is not provided BDDC uses constants by default */
1377     nnsp_size = 0;
1378     nnsp_has_cnst = PETSC_TRUE;
1379   }
1380   /* get max number of constraints on a single cc */
1381   max_constraints = nnsp_size;
1382   if (nnsp_has_cnst) max_constraints++;
1383 
1384   /*
1385        Evaluate maximum storage size needed by the procedure
1386        - temp_indices will contain start index of each constraint stored as follows
1387        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1388        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1389        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1390                                                                                                                                                          */
1391   total_counts = n_ISForFaces+n_ISForEdges;
1392   total_counts *= max_constraints;
1393   n_vertices = 0;
1394   if (ISForVertices) {
1395     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1396   }
1397   total_counts += n_vertices;
1398   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1399   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1400   total_counts = 0;
1401   max_size_of_constraint = 0;
1402   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1403     if (i<n_ISForEdges) {
1404       used_IS = &ISForEdges[i];
1405     } else {
1406       used_IS = &ISForFaces[i-n_ISForEdges];
1407     }
1408     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1409     total_counts += j;
1410     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1411   }
1412   total_counts *= max_constraints;
1413   total_counts += n_vertices;
1414   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1415   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1416   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1417   /* get local part of global near null space vectors */
1418   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1419   for (k=0;k<nnsp_size;k++) {
1420     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1421     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1422     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423   }
1424 
1425   /* whether or not to skip lapack calls */
1426   skip_lapack = PETSC_TRUE;
1427   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1428 
1429   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1430   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1431     PetscScalar temp_work;
1432 #if defined(PETSC_MISSING_LAPACK_GESVD)
1433     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1434     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1435     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1436     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1437 #if defined(PETSC_USE_COMPLEX)
1438     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1439 #endif
1440     /* now we evaluate the optimal workspace using query with lwork=-1 */
1441     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1442     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1443     lwork = -1;
1444     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1445 #if !defined(PETSC_USE_COMPLEX)
1446     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1447 #else
1448     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1449 #endif
1450     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1451     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1452 #else /* on missing GESVD */
1453     /* SVD */
1454     PetscInt max_n,min_n;
1455     max_n = max_size_of_constraint;
1456     min_n = max_constraints;
1457     if (max_size_of_constraint < max_constraints) {
1458       min_n = max_size_of_constraint;
1459       max_n = max_constraints;
1460     }
1461     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1462 #if defined(PETSC_USE_COMPLEX)
1463     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1464 #endif
1465     /* now we evaluate the optimal workspace using query with lwork=-1 */
1466     lwork = -1;
1467     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1468     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1469     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1470     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1471 #if !defined(PETSC_USE_COMPLEX)
1472     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1473 #else
1474     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1475 #endif
1476     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1477     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1478 #endif /* on missing GESVD */
1479     /* Allocate optimal workspace */
1480     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1481     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1482   }
1483   /* Now we can loop on constraining sets */
1484   total_counts = 0;
1485   temp_indices[0] = 0;
1486   /* vertices */
1487   if (ISForVertices) {
1488     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1489     if (nnsp_has_cnst) { /* consider all vertices */
1490       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1491       for (i=0;i<n_vertices;i++) {
1492         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1493         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1494         total_counts++;
1495       }
1496     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1497       PetscBool used_vertex;
1498       for (i=0;i<n_vertices;i++) {
1499         used_vertex = PETSC_FALSE;
1500         k = 0;
1501         while (!used_vertex && k<nnsp_size) {
1502           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1503           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1504             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1505             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1506             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1507             total_counts++;
1508             used_vertex = PETSC_TRUE;
1509           }
1510           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1511           k++;
1512         }
1513       }
1514     }
1515     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1516     n_vertices = total_counts;
1517   }
1518 
1519   /* edges and faces */
1520   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1521     if (i<n_ISForEdges) {
1522       used_IS = &ISForEdges[i];
1523       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1524     } else {
1525       used_IS = &ISForFaces[i-n_ISForEdges];
1526       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1527     }
1528     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1529     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1530     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1531     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1532     /* change of basis should not be performed on local periodic nodes */
1533     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1534     if (nnsp_has_cnst) {
1535       PetscScalar quad_value;
1536       temp_constraints++;
1537       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1538       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1539       for (j=0;j<size_of_constraint;j++) {
1540         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1541       }
1542       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1543       total_counts++;
1544     }
1545     for (k=0;k<nnsp_size;k++) {
1546       PetscReal real_value;
1547       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1548       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1549       for (j=0;j<size_of_constraint;j++) {
1550         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1551       }
1552       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1553       /* check if array is null on the connected component */
1554       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1555       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1556       if (real_value > 0.0) { /* keep indices and values */
1557         temp_constraints++;
1558         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1559         total_counts++;
1560       }
1561     }
1562     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1563     valid_constraints = temp_constraints;
1564     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1565     if (!pcbddc->use_nnsp_true && temp_constraints) {
1566       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1567 
1568 #if defined(PETSC_MISSING_LAPACK_GESVD)
1569       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1570          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1571          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1572             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1573             from that computed using LAPACKgesvd
1574          -> This is due to a different computation of eigenvectors in LAPACKheev
1575          -> The quality of the POD-computed basis will be the same */
1576       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1577       /* Store upper triangular part of correlation matrix */
1578       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1579       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1580       for (j=0;j<temp_constraints;j++) {
1581         for (k=0;k<j+1;k++) {
1582           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1583         }
1584       }
1585       /* compute eigenvalues and eigenvectors of correlation matrix */
1586       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1587       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1588 #if !defined(PETSC_USE_COMPLEX)
1589       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1590 #else
1591       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1592 #endif
1593       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1594       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1595       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1596       j=0;
1597       while (j < temp_constraints && singular_vals[j] < tol) j++;
1598       total_counts=total_counts-j;
1599       valid_constraints = temp_constraints-j;
1600       /* scale and copy POD basis into used quadrature memory */
1601       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1602       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1603       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1604       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1605       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1606       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1607       if (j<temp_constraints) {
1608         PetscInt ii;
1609         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1610         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1611         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1612         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1613         for (k=0;k<temp_constraints-j;k++) {
1614           for (ii=0;ii<size_of_constraint;ii++) {
1615             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1616           }
1617         }
1618       }
1619 #else  /* on missing GESVD */
1620       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1621       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1622       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1623       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1624 #if !defined(PETSC_USE_COMPLEX)
1625       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1626 #else
1627       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1628 #endif
1629       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1630       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1631       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1632       k = temp_constraints;
1633       if (k > size_of_constraint) k = size_of_constraint;
1634       j = 0;
1635       while (j < k && singular_vals[k-j-1] < tol) j++;
1636       total_counts = total_counts-temp_constraints+k-j;
1637       valid_constraints = k-j;
1638 #endif /* on missing GESVD */
1639     }
1640     /* setting change_of_basis flag is safe now */
1641     if (boolforchange) {
1642       for (j=0;j<valid_constraints;j++) {
1643         PetscBTSet(change_basis,total_counts-j-1);
1644       }
1645     }
1646   }
1647   /* free index sets of faces, edges and vertices */
1648   for (i=0;i<n_ISForFaces;i++) {
1649     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1650   }
1651   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1652   for (i=0;i<n_ISForEdges;i++) {
1653     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1654   }
1655   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1656   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1657   /* map temp_indices_to_constraint in boundary numbering */
1658   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1659   if (i != temp_indices[total_counts]) {
1660     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1661   }
1662 
1663   /* free workspace */
1664   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1665     ierr = PetscFree(work);CHKERRQ(ierr);
1666 #if defined(PETSC_USE_COMPLEX)
1667     ierr = PetscFree(rwork);CHKERRQ(ierr);
1668 #endif
1669     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1670 #if defined(PETSC_MISSING_LAPACK_GESVD)
1671     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1672     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1673 #endif
1674   }
1675   for (k=0;k<nnsp_size;k++) {
1676     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1677   }
1678   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1679 
1680   /* set quantities in pcbddc data structure and store previous primal size */
1681   /* n_vertices defines the number of subdomain corners in the primal space */
1682   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1683   olocal_primal_size = pcbddc->local_primal_size;
1684   pcbddc->local_primal_size = total_counts;
1685   pcbddc->n_vertices = n_vertices;
1686   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1687 
1688   /* Create constraint matrix */
1689   /* The constraint matrix is used to compute the l2g map of primal dofs */
1690   /* so we need to set it up properly either with or without change of basis */
1691   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1692   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1693   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1694   /* array to compute a local numbering of constraints : vertices first then constraints */
1695   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1696   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1697   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1698   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1699   /* auxiliary stuff for basis change */
1700   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1701   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1702 
1703   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1704   total_primal_vertices=0;
1705   for (i=0;i<pcbddc->local_primal_size;i++) {
1706     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1707     if (size_of_constraint == 1) {
1708       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1709       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1710       aux_primal_minloc[total_primal_vertices]=0;
1711       total_primal_vertices++;
1712     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1713       PetscInt min_loc,min_index;
1714       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1715       /* find first untouched local node */
1716       k = 0;
1717       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1718       min_index = global_indices[k];
1719       min_loc = k;
1720       /* search the minimum among global nodes already untouched on the cc */
1721       for (k=1;k<size_of_constraint;k++) {
1722         /* there can be more than one constraint on a single connected component */
1723         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1724           min_index = global_indices[k];
1725           min_loc = k;
1726         }
1727       }
1728       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1729       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1730       aux_primal_minloc[total_primal_vertices]=min_loc;
1731       total_primal_vertices++;
1732     }
1733   }
1734   /* determine if a QR strategy is needed for change of basis */
1735   qr_needed = PETSC_FALSE;
1736   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1737   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1738     if (PetscBTLookup(change_basis,i)) {
1739       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1740       j = 0;
1741       for (k=0;k<size_of_constraint;k++) {
1742         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1743           j++;
1744         }
1745       }
1746       /* found more than one primal dof on the cc */
1747       if (j > 1) {
1748         PetscBTSet(qr_needed_idx,i);
1749         qr_needed = PETSC_TRUE;
1750       }
1751     }
1752   }
1753   /* free workspace */
1754   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1755 
1756   /* permute indices in order to have a sorted set of vertices */
1757   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1758 
1759   /* nonzero structure of constraint matrix */
1760   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1761   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1762   j=total_primal_vertices;
1763   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1764     if (!PetscBTLookup(change_basis,i)) {
1765       nnz[j]=temp_indices[i+1]-temp_indices[i];
1766       j++;
1767     }
1768   }
1769   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1770   ierr = PetscFree(nnz);CHKERRQ(ierr);
1771   /* set values in constraint matrix */
1772   for (i=0;i<total_primal_vertices;i++) {
1773     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1774   }
1775   total_counts = total_primal_vertices;
1776   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1777     if (!PetscBTLookup(change_basis,i)) {
1778       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1779       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1780       total_counts++;
1781     }
1782   }
1783   /* assembling */
1784   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1785   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1786   /*
1787   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1788   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1789   */
1790   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1791   if (pcbddc->use_change_of_basis) {
1792     /* dual and primal dofs on a single cc */
1793     PetscInt     dual_dofs,primal_dofs;
1794     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1795     PetscInt     primal_counter;
1796     /* working stuff for GEQRF */
1797     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1798     PetscBLASInt lqr_work;
1799     /* working stuff for UNGQR */
1800     PetscScalar  *gqr_work,lgqr_work_t;
1801     PetscBLASInt lgqr_work;
1802     /* working stuff for TRTRS */
1803     PetscScalar  *trs_rhs;
1804     PetscBLASInt Blas_NRHS;
1805     /* pointers for values insertion into change of basis matrix */
1806     PetscInt     *start_rows,*start_cols;
1807     PetscScalar  *start_vals;
1808     /* working stuff for values insertion */
1809     PetscBT      is_primal;
1810 
1811     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1812     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1813     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1814     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1815     /* work arrays */
1816     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1817     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1818     /* nonzeros per row */
1819     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1820       if (PetscBTLookup(change_basis,i)) {
1821         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1822         if (PetscBTLookup(qr_needed_idx,i)) {
1823           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1824         } else {
1825           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1826           /* get local primal index on the cc */
1827           j = 0;
1828           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1829           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1830         }
1831       }
1832     }
1833     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1834     ierr = PetscFree(nnz);CHKERRQ(ierr);
1835     /* Set initial identity in the matrix */
1836     for (i=0;i<pcis->n_B;i++) {
1837       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1838     }
1839 
1840     if (pcbddc->dbg_flag) {
1841       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1842       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1843     }
1844 
1845 
1846     /* Now we loop on the constraints which need a change of basis */
1847     /*
1848        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1849        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1850 
1851        Basic blocks of change of basis matrix T computed
1852 
1853           - Using the following block transformation if there is only a primal dof on the cc
1854             (in the example, primal dof is the last one of the edge in LOCAL ordering
1855              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1856             | 1        0   ...        0     1 |
1857             | 0        1   ...        0     1 |
1858             |              ...                |
1859             | 0        ...            1     1 |
1860             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1861 
1862           - via QR decomposition of constraints otherwise
1863     */
1864     if (qr_needed) {
1865       /* space to store Q */
1866       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1867       /* first we issue queries for optimal work */
1868       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1869       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1870       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1871       lqr_work = -1;
1872       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1873       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1874       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1875       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1876       lgqr_work = -1;
1877       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1878       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1879       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1880       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1881       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1882       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1883       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1884       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1885       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1886       /* array to store scaling factors for reflectors */
1887       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1888       /* array to store rhs and solution of triangular solver */
1889       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1890       /* allocating workspace for check */
1891       if (pcbddc->dbg_flag) {
1892         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1893       }
1894     }
1895     /* array to store whether a node is primal or not */
1896     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1897     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1898     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1899     if (i != total_primal_vertices) {
1900       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1901     }
1902     for (i=0;i<total_primal_vertices;i++) {
1903       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1904     }
1905     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1906 
1907     /* loop on constraints and see whether or not they need a change of basis and compute it */
1908     /* -> using implicit ordering contained in temp_indices data */
1909     total_counts = pcbddc->n_vertices;
1910     primal_counter = total_counts;
1911     while (total_counts<pcbddc->local_primal_size) {
1912       primal_dofs = 1;
1913       if (PetscBTLookup(change_basis,total_counts)) {
1914         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1915         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1916           primal_dofs++;
1917         }
1918         /* get constraint info */
1919         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1920         dual_dofs = size_of_constraint-primal_dofs;
1921 
1922         if (pcbddc->dbg_flag) {
1923           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1924         }
1925 
1926         if (primal_dofs > 1) { /* QR */
1927 
1928           /* copy quadrature constraints for change of basis check */
1929           if (pcbddc->dbg_flag) {
1930             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1931           }
1932           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1933           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1934 
1935           /* compute QR decomposition of constraints */
1936           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1937           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1938           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1939           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1940           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1941           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1942           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1943 
1944           /* explictly compute R^-T */
1945           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1946           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1947           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1948           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1949           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1950           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1951           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1952           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1953           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1954           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1955 
1956           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1957           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1958           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1959           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1960           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1961           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1962           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1963           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1964           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1965 
1966           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1967              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1968              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1969           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1970           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1971           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1972           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1973           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1974           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1975           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1976           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1977           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1978           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1979 
1980           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1981           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1982           /* insert cols for primal dofs */
1983           for (j=0;j<primal_dofs;j++) {
1984             start_vals = &qr_basis[j*size_of_constraint];
1985             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1986             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1987           }
1988           /* insert cols for dual dofs */
1989           for (j=0,k=0;j<dual_dofs;k++) {
1990             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1991               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1992               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1993               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1994               j++;
1995             }
1996           }
1997 
1998           /* check change of basis */
1999           if (pcbddc->dbg_flag) {
2000             PetscInt   ii,jj;
2001             PetscBool valid_qr=PETSC_TRUE;
2002             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2003             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2004             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2005             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2006             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2007             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2008             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2009             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2010             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2011             for (jj=0;jj<size_of_constraint;jj++) {
2012               for (ii=0;ii<primal_dofs;ii++) {
2013                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2014                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2015               }
2016             }
2017             if (!valid_qr) {
2018               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2019               for (jj=0;jj<size_of_constraint;jj++) {
2020                 for (ii=0;ii<primal_dofs;ii++) {
2021                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2022                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2023                   }
2024                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2025                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2026                   }
2027                 }
2028               }
2029             } else {
2030               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2031             }
2032           }
2033         } else { /* simple transformation block */
2034           PetscInt row,col;
2035           PetscScalar val;
2036           for (j=0;j<size_of_constraint;j++) {
2037             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2038             if (!PetscBTLookup(is_primal,row)) {
2039               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2040               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2041               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2042             } else {
2043               for (k=0;k<size_of_constraint;k++) {
2044                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2045                 if (row != col) {
2046                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2047                 } else {
2048                   val = 1.0;
2049                 }
2050                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2051               }
2052             }
2053           }
2054         }
2055         /* increment primal counter */
2056         primal_counter += primal_dofs;
2057       } else {
2058         if (pcbddc->dbg_flag) {
2059           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2060         }
2061       }
2062       /* increment constraint counter total_counts */
2063       total_counts += primal_dofs;
2064     }
2065 
2066     /* free workspace */
2067     if (qr_needed) {
2068       if (pcbddc->dbg_flag) {
2069         ierr = PetscFree(work);CHKERRQ(ierr);
2070       }
2071       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2072       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2073       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2074       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2075       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2076     }
2077     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2078     /* assembling */
2079     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081     /*
2082     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2083     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2084     */
2085   }
2086 
2087   /* get indices in local ordering for vertices and constraints */
2088   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2089     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2090     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2091   }
2092   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2093   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2094   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2095   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2096   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2097   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2098   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2099   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2100   /* set quantities in PCBDDC data struct */
2101   pcbddc->n_actual_vertices = i;
2102   /* check if a new primal space has been introduced */
2103   pcbddc->new_primal_space_local = PETSC_TRUE;
2104   if (olocal_primal_size == pcbddc->local_primal_size) {
2105     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2106     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2107     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2108   }
2109   /* need to set pcbddc->new_primal_space_local to true when the local primal space is empty, otherwise pcbddc->is_R_local will not be created */
2110   if (!pcbddc->local_primal_size) pcbddc->new_primal_space_local = PETSC_TRUE;
2111   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2112   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2113 
2114   /* flush dbg viewer */
2115   if (pcbddc->dbg_flag) {
2116     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2117   }
2118 
2119   /* free workspace */
2120   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2121   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2122   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2123   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2124   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2125   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2126   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2127   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2133 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2134 {
2135   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2136   PC_IS       *pcis = (PC_IS*)pc->data;
2137   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2138   PetscInt    ierr,i,vertex_size;
2139   PetscViewer viewer=pcbddc->dbg_viewer;
2140 
2141   PetscFunctionBegin;
2142   /* Reset previously computed graph */
2143   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2144   /* Init local Graph struct */
2145   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2146 
2147   /* Check validity of the csr graph passed in by the user */
2148   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2149     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2150   }
2151 
2152   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2153   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2154     Mat mat_adj;
2155     const PetscInt *xadj,*adjncy;
2156     PetscBool flg_row=PETSC_TRUE;
2157 
2158     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2159     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2160     if (!flg_row) {
2161       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2162     }
2163     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2164     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2165     if (!flg_row) {
2166       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2167     }
2168     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2169   }
2170 
2171   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2172   vertex_size = 1;
2173   if (pcbddc->user_provided_isfordofs) {
2174     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2175       ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2176       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2177         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2178         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2179       }
2180       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2181       pcbddc->n_ISForDofs = 0;
2182       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2183     }
2184     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2185     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2186   } else {
2187     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2188       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2189       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2190       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2191         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2192       }
2193     }
2194   }
2195 
2196   /* Setup of Graph */
2197   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2198     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2199   }
2200   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2201     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2202   }
2203   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2204 
2205   /* Graph's connected components analysis */
2206   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2207 
2208   /* print some info to stdout */
2209   if (pcbddc->dbg_flag) {
2210     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2211   }
2212 
2213   /* mark topography has done */
2214   pcbddc->recompute_topography = PETSC_FALSE;
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 #undef __FUNCT__
2219 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2220 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2221 {
2222   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2223   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2224   PetscErrorCode ierr;
2225 
2226   PetscFunctionBegin;
2227   n = 0;
2228   vertices = 0;
2229   if (pcbddc->ConstraintMatrix) {
2230     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2231     for (i=0;i<local_primal_size;i++) {
2232       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2233       if (size_of_constraint == 1) n++;
2234       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2235     }
2236     if (vertices_idx) {
2237       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2238       n = 0;
2239       for (i=0;i<local_primal_size;i++) {
2240         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2241         if (size_of_constraint == 1) {
2242           vertices[n++]=row_cmat_indices[0];
2243         }
2244         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2245       }
2246     }
2247   }
2248   *n_vertices = n;
2249   if (vertices_idx) *vertices_idx = vertices;
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2255 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2256 {
2257   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2258   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2259   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2260   PetscBT        touched;
2261   PetscErrorCode ierr;
2262 
2263     /* This function assumes that the number of local constraints per connected component
2264        is not greater than the number of nodes defined for the connected component
2265        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2266   PetscFunctionBegin;
2267   n = 0;
2268   constraints_index = 0;
2269   if (pcbddc->ConstraintMatrix) {
2270     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2271     max_size_of_constraint = 0;
2272     for (i=0;i<local_primal_size;i++) {
2273       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2274       if (size_of_constraint > 1) {
2275         n++;
2276       }
2277       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2278       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2279     }
2280     if (constraints_idx) {
2281       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2282       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2283       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2284       n = 0;
2285       for (i=0;i<local_primal_size;i++) {
2286         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2287         if (size_of_constraint > 1) {
2288           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2289           /* find first untouched local node */
2290           j = 0;
2291           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2292           min_index = row_cmat_global_indices[j];
2293           min_loc = j;
2294           /* search the minimum among nodes not yet touched on the connected component
2295              since there can be more than one constraint on a single cc */
2296           for (j=1;j<size_of_constraint;j++) {
2297             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2298               min_index = row_cmat_global_indices[j];
2299               min_loc = j;
2300             }
2301           }
2302           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2303           constraints_index[n++] = row_cmat_indices[min_loc];
2304         }
2305         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2306       }
2307       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2308       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2309     }
2310   }
2311   *n_constraints = n;
2312   if (constraints_idx) *constraints_idx = constraints_index;
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 /* the next two functions has been adapted from pcis.c */
2317 #undef __FUNCT__
2318 #define __FUNCT__ "PCBDDCApplySchur"
2319 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2320 {
2321   PetscErrorCode ierr;
2322   PC_IS          *pcis = (PC_IS*)(pc->data);
2323 
2324   PetscFunctionBegin;
2325   if (!vec2_B) { vec2_B = v; }
2326   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2327   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2328   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2329   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2330   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2331   PetscFunctionReturn(0);
2332 }
2333 
2334 #undef __FUNCT__
2335 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2336 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2337 {
2338   PetscErrorCode ierr;
2339   PC_IS          *pcis = (PC_IS*)(pc->data);
2340 
2341   PetscFunctionBegin;
2342   if (!vec2_B) { vec2_B = v; }
2343   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2344   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2345   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2346   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2347   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "PCBDDCSubsetNumbering"
2353 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2354 {
2355   Vec            local_vec,global_vec;
2356   IS             seqis,paris;
2357   VecScatter     scatter_ctx;
2358   PetscScalar    *array;
2359   PetscInt       *temp_global_dofs;
2360   PetscScalar    globalsum;
2361   PetscInt       i,j,s;
2362   PetscInt       nlocals,first_index,old_index,max_local;
2363   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2364   PetscMPIInt    *dof_sizes,*dof_displs;
2365   PetscBool      first_found;
2366   PetscErrorCode ierr;
2367 
2368   PetscFunctionBegin;
2369   /* mpi buffers */
2370   MPI_Comm_size(comm,&size_prec_comm);
2371   MPI_Comm_rank(comm,&rank_prec_comm);
2372   j = ( !rank_prec_comm ? size_prec_comm : 0);
2373   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2374   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2375   /* get maximum size of subset */
2376   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2377   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2378   max_local = 0;
2379   if (n_local_dofs) {
2380     max_local = temp_global_dofs[0];
2381     for (i=1;i<n_local_dofs;i++) {
2382       if (max_local < temp_global_dofs[i] ) {
2383         max_local = temp_global_dofs[i];
2384       }
2385     }
2386   }
2387   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2388   max_global++;
2389   max_local = 0;
2390   if (n_local_dofs) {
2391     max_local = local_dofs[0];
2392     for (i=1;i<n_local_dofs;i++) {
2393       if (max_local < local_dofs[i] ) {
2394         max_local = local_dofs[i];
2395       }
2396     }
2397   }
2398   max_local++;
2399   /* allocate workspace */
2400   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2401   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2402   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2403   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2404   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2405   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2406   /* create scatter */
2407   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2408   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2409   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2410   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2411   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2412   /* init array */
2413   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2414   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2415   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2416   if (local_dofs_mult) {
2417     for (i=0;i<n_local_dofs;i++) {
2418       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2419     }
2420   } else {
2421     for (i=0;i<n_local_dofs;i++) {
2422       array[local_dofs[i]]=1.0;
2423     }
2424   }
2425   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2426   /* scatter into global vec and get total number of global dofs */
2427   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2428   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2429   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2430   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2431   /* Fill global_vec with cumulative function for global numbering */
2432   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2433   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2434   nlocals = 0;
2435   first_index = -1;
2436   first_found = PETSC_FALSE;
2437   for (i=0;i<s;i++) {
2438     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2439       first_found = PETSC_TRUE;
2440       first_index = i;
2441     }
2442     nlocals += (PetscInt)PetscRealPart(array[i]);
2443   }
2444   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2445   if (!rank_prec_comm) {
2446     dof_displs[0]=0;
2447     for (i=1;i<size_prec_comm;i++) {
2448       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2449     }
2450   }
2451   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2452   if (first_found) {
2453     array[first_index] += (PetscScalar)nlocals;
2454     old_index = first_index;
2455     for (i=first_index+1;i<s;i++) {
2456       if (PetscRealPart(array[i]) > 0.0) {
2457         array[i] += array[old_index];
2458         old_index = i;
2459       }
2460     }
2461   }
2462   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2463   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2464   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2465   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2466   /* get global ordering of local dofs */
2467   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2468   if (local_dofs_mult) {
2469     for (i=0;i<n_local_dofs;i++) {
2470       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2471     }
2472   } else {
2473     for (i=0;i<n_local_dofs;i++) {
2474       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2475     }
2476   }
2477   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2478   /* free workspace */
2479   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2480   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2481   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2482   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2483   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2484   /* return pointer to global ordering of local dofs */
2485   *global_numbering_subset = temp_global_dofs;
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2491 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2492 {
2493   PetscInt       i,j;
2494   PetscScalar    *alphas;
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   /* this implements stabilized Gram-Schmidt */
2499   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2500   for (i=0;i<n;i++) {
2501     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2502     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2503     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2504   }
2505   ierr = PetscFree(alphas);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 /* TODO
2510    - now preallocation is done assuming SEQDENSE local matrices
2511 */
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatISGetMPIXAIJ"
2514 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2515 {
2516   Mat                    new_mat;
2517   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2518   /* info on mat */
2519   /* ISLocalToGlobalMapping rmapping,cmapping; */
2520   PetscInt               bs,rows,cols;
2521   PetscInt               lrows,lcols;
2522   PetscInt               local_rows,local_cols;
2523   PetscBool              isdense;
2524   /* values insertion */
2525   PetscScalar            *array;
2526   PetscInt               *local_indices,*global_indices;
2527   /* work */
2528   PetscInt               i,j,index_row;
2529   PetscErrorCode         ierr;
2530 
2531   PetscFunctionBegin;
2532   /* MISSING CHECKS
2533     - rectangular case not covered (it is not allowed by MATIS)
2534   */
2535   /* get info from mat */
2536   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2537   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2538   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2539   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2540 
2541   /* work */
2542   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2543   for (i=0;i<local_rows;i++) local_indices[i]=i;
2544   /* map indices of local mat to global values */
2545   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2546   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2547   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2548 
2549   if (reuse==MAT_INITIAL_MATRIX) {
2550     Vec         vec_dnz,vec_onz;
2551     PetscScalar *my_dnz,*my_onz;
2552     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2553     PetscInt    index_col,owner;
2554     PetscMPIInt nsubdomains;
2555 
2556     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2557     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2558     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2559     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2560     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2561     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2562     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2563 
2564     /*
2565       preallocation
2566     */
2567 
2568     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2569     /*
2570        Some vectors are needed to sum up properly on shared interface dofs.
2571        Preallocation macros cannot do the job.
2572        Note that preallocation is not exact, since it overestimates nonzeros
2573     */
2574     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2575     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2576     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2577     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2578     /* All processes need to compute entire row ownership */
2579     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2580     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2581     for (i=0;i<nsubdomains;i++) {
2582       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2583         row_ownership[j]=i;
2584       }
2585     }
2586 
2587     /*
2588        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2589        then, they will be summed up properly. This way, preallocation is always sufficient
2590     */
2591     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2592     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2593     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2594     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2595     for (i=0;i<local_rows;i++) {
2596       index_row = global_indices[i];
2597       for (j=i;j<local_rows;j++) {
2598         owner = row_ownership[index_row];
2599         index_col = global_indices[j];
2600         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2601           my_dnz[i] += 1.0;
2602         } else { /* offdiag block */
2603           my_onz[i] += 1.0;
2604         }
2605         /* same as before, interchanging rows and cols */
2606         if (i != j) {
2607           owner = row_ownership[index_col];
2608           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2609             my_dnz[j] += 1.0;
2610           } else {
2611             my_onz[j] += 1.0;
2612           }
2613         }
2614       }
2615     }
2616     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2617     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2618     if (local_rows) { /* multilevel guard */
2619       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2620       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2621     }
2622     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2623     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2624     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2625     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2626     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2627     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2628     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2629 
2630     /* set computed preallocation in dnz and onz */
2631     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2632     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2633     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2634     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2635     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2636     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2637     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2638     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2639 
2640     /* Resize preallocation if overestimated */
2641     for (i=0;i<lrows;i++) {
2642       dnz[i] = PetscMin(dnz[i],lcols);
2643       onz[i] = PetscMin(onz[i],cols-lcols);
2644     }
2645     /* set preallocation */
2646     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2647     for (i=0;i<lrows/bs;i++) {
2648       dnz[i] = dnz[i*bs]/bs;
2649       onz[i] = onz[i*bs]/bs;
2650     }
2651     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2652     for (i=0;i<lrows/bs;i++) {
2653       dnz[i] = dnz[i]-i;
2654     }
2655     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2656     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2657     *M = new_mat;
2658   } else {
2659     PetscInt mbs,mrows,mcols;
2660     /* some checks */
2661     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2662     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2663     if (mrows != rows) {
2664       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2665     }
2666     if (mrows != rows) {
2667       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2668     }
2669     if (mbs != bs) {
2670       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2671     }
2672     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2673   }
2674   /* set local to global mappings */
2675   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2676   /* Set values */
2677   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2678   if (isdense) { /* special case for dense local matrices */
2679     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2680     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2681     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2682     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2683     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2684     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2685   } else { /* very basic values insertion for all other matrix types */
2686     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2687     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2688     for (i=0;i<local_rows;i++) {
2689       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2690       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2691       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2692       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2693       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2694       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2695     }
2696   }
2697   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2698   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2699   if (isdense) {
2700     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2701   }
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 #undef __FUNCT__
2706 #define __FUNCT__ "MatISSubassemble_Private"
2707 PetscErrorCode MatISSubassemble_Private(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2708 {
2709   Mat             subdomain_adj;
2710   IS              new_ranks,ranks_send_to;
2711   MatPartitioning partitioner;
2712   Mat_IS          *matis;
2713   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2714   PetscInt        prank;
2715   PetscMPIInt     size,rank,color;
2716   PetscInt        *xadj,*adjncy,*oldranks;
2717   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2718   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2719   PetscErrorCode  ierr;
2720   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2721   PetscSubcomm    subcomm;
2722 
2723   PetscFunctionBegin;
2724   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2725   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2726   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2727 
2728   /* Get info on mapping */
2729   matis = (Mat_IS*)(mat->data);
2730   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2731   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2732 
2733   /* build local CSR graph of subdomains' connectivity */
2734   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2735   xadj[0] = 0;
2736   xadj[1] = PetscMax(n_neighs-1,0);
2737   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2738   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2739 
2740   if (threshold) {
2741     PetscInt* count,min_threshold;
2742     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2743     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2744     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2745       for (j=0;j<n_shared[i];j++) {
2746         count[shared[i][j]] += 1;
2747       }
2748     }
2749     /* adapt threshold since we dont want to lose significant connections */
2750     min_threshold = n_neighs;
2751     for (i=1;i<n_neighs;i++) {
2752       for (j=0;j<n_shared[i];j++) {
2753         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2754       }
2755     }
2756     threshold = PetscMax(min_threshold+1,threshold);
2757     xadj[1] = 0;
2758     for (i=1;i<n_neighs;i++) {
2759       for (j=0;j<n_shared[i];j++) {
2760         if (count[shared[i][j]] < threshold) {
2761           adjncy[xadj[1]] = neighs[i];
2762           adjncy_wgt[xadj[1]] = n_shared[i];
2763           xadj[1]++;
2764           break;
2765         }
2766       }
2767     }
2768     ierr = PetscFree(count);CHKERRQ(ierr);
2769   } else {
2770     if (xadj[1]) {
2771       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2772       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2773     }
2774   }
2775   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2776   if (use_square) {
2777     for (i=0;i<xadj[1];i++) {
2778       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2779     }
2780   }
2781   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2782 
2783   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2784 
2785   /*
2786     Restrict work on active processes only.
2787   */
2788   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2789   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2790   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2791   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2792   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2793   if (color) {
2794     ierr = PetscFree(xadj);CHKERRQ(ierr);
2795     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2796     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2797   } else {
2798     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2799     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2800     prank = rank;
2801     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2802     for (i=0;i<size;i++) {
2803       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2804     }
2805     for (i=0;i<xadj[1];i++) {
2806       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2807     }
2808     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2809     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2810     n_subdomains = (PetscInt)size;
2811     ierr = MatView(subdomain_adj,0);CHKERRQ(ierr);
2812 
2813     /* Partition */
2814     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2815     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2816     if (use_vwgt) {
2817       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2818       v_wgt[0] = local_size;
2819       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2820     }
2821     ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2822     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2823     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2824     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2825     ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr);
2826 
2827     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2828     ranks_send_to_idx[0] = oldranks[is_indices[0]];
2829     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2830     /* clean up */
2831     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2832     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2833     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2834     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2835   }
2836   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2837 
2838   /* assemble parallel IS for sends */
2839   i = 1;
2840   if (color) i=0;
2841   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2842   ierr = ISView(ranks_send_to,0);CHKERRQ(ierr);
2843 
2844   /* get back IS */
2845   *is_sends = ranks_send_to;
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2850 
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatISSubassemble"
2853 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, Mat *mat_n)
2854 {
2855   Mat                    local_mat,new_mat;
2856   Mat_IS                 *matis;
2857   IS                     is_sends_internal;
2858   PetscInt               rows,cols;
2859   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2860   PetscBool              ismatis,isdense;
2861   ISLocalToGlobalMapping l2gmap;
2862   PetscInt*              l2gmap_indices;
2863   const PetscInt*        is_indices;
2864   MatType                new_local_type;
2865   MatTypePrivate         new_local_type_private;
2866   /* buffers */
2867   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2868   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2869   /* MPI */
2870   MPI_Comm               comm;
2871   PetscMPIInt            n_sends,n_recvs,commsize;
2872   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2873   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2874   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2875   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2876   PetscErrorCode         ierr;
2877 
2878   PetscFunctionBegin;
2879   /* checks */
2880   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2881   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2882   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2883   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2884   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2885   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2886   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2887   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2888   PetscValidLogicalCollectiveInt(mat,bs,0);
2889   /* prepare IS for sending if not provided */
2890   if (!is_sends) {
2891     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2892     ierr = MatISSubassemble_Private(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2893   } else {
2894     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2895     is_sends_internal = is_sends;
2896   }
2897 
2898   /* get pointer of MATIS data */
2899   matis = (Mat_IS*)mat->data;
2900 
2901   /* get comm */
2902   comm = PetscObjectComm((PetscObject)mat);
2903 
2904   /* compute number of sends */
2905   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2906   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2907 
2908   /* compute number of receives */
2909   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2910   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2911   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2912   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2913   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2914   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2915   ierr = PetscFree(iflags);CHKERRQ(ierr);
2916 
2917   /* prepare send/receive buffers */
2918   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2919   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2920   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2921   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2922 
2923   /* Get data from local mat */
2924   if (!isdense) {
2925     /* TODO: See below some guidelines on how to prepare the local buffers */
2926     /*
2927        send_buffer_vals should contain the raw values of the local matrix
2928        send_buffer_idxs should contain:
2929        - MatType_PRIVATE type
2930        - PetscInt        size_of_l2gmap
2931        - PetscInt        global_row_indices[size_of_l2gmap]
2932        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2933     */
2934   } else {
2935     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2936     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2937     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2938     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2939     send_buffer_idxs[1] = i;
2940     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2941     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2942     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2943     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2944     for (i=0;i<n_sends;i++) {
2945       ilengths_vals[is_indices[i]] = len*len;
2946       ilengths_idxs[is_indices[i]] = len+2;
2947     }
2948   }
2949   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2950   buf_size_idxs = 0;
2951   buf_size_vals = 0;
2952   for (i=0;i<n_recvs;i++) {
2953     buf_size_idxs += (PetscInt)olengths_idxs[i];
2954     buf_size_vals += (PetscInt)olengths_vals[i];
2955   }
2956   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2957   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2958 
2959   /* get new tags for clean communications */
2960   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2961   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2962 
2963   /* allocate for requests */
2964   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2965   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2966   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2967   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2968 
2969   /* communications */
2970   ptr_idxs = recv_buffer_idxs;
2971   ptr_vals = recv_buffer_vals;
2972   for (i=0;i<n_recvs;i++) {
2973     source_dest = onodes[i];
2974     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2975     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2976     ptr_idxs += olengths_idxs[i];
2977     ptr_vals += olengths_vals[i];
2978   }
2979   for (i=0;i<n_sends;i++) {
2980     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2981     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2982     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2983   }
2984   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2985   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2986 
2987   /* assemble new l2g map */
2988   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
2989   ptr_idxs = recv_buffer_idxs;
2990   buf_size_idxs = 0;
2991   for (i=0;i<n_recvs;i++) {
2992     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
2993     ptr_idxs += olengths_idxs[i];
2994   }
2995   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
2996   ptr_idxs = recv_buffer_idxs;
2997   buf_size_idxs = 0;
2998   for (i=0;i<n_recvs;i++) {
2999     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3000     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3001     ptr_idxs += olengths_idxs[i];
3002   }
3003   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3004   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3005   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3006 
3007   /* infer new local matrix type from received local matrices type */
3008   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3009   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3010   new_local_type_private = MATAIJ_PRIVATE;
3011   new_local_type = MATSEQAIJ;
3012   if (n_recvs) {
3013     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3014     ptr_idxs = recv_buffer_idxs;
3015     for (i=0;i<n_recvs;i++) {
3016       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3017         new_local_type_private = MATAIJ_PRIVATE;
3018         break;
3019       }
3020       ptr_idxs += olengths_idxs[i];
3021     }
3022     switch (new_local_type_private) {
3023       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3024         new_local_type = MATSEQAIJ;
3025         bs = 1;
3026         break;
3027       case MATAIJ_PRIVATE:
3028         new_local_type = MATSEQAIJ;
3029         bs = 1;
3030         break;
3031       case MATBAIJ_PRIVATE:
3032         new_local_type = MATSEQBAIJ;
3033         break;
3034       case MATSBAIJ_PRIVATE:
3035         new_local_type = MATSEQSBAIJ;
3036         break;
3037       default:
3038         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3039         break;
3040     }
3041   }
3042 
3043   /* create MATIS object */
3044   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3045   ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,&new_mat);CHKERRQ(ierr);
3046   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3047   ierr = MatISGetLocalMat(new_mat,&local_mat);CHKERRQ(ierr);
3048   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3049   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3050 
3051   /* set values */
3052   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3053   ptr_vals = recv_buffer_vals;
3054   ptr_idxs = recv_buffer_idxs;
3055   for (i=0;i<n_recvs;i++) {
3056     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3057       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3058       ierr = MatSetValues(new_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3059       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3060       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3061       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3062     }
3063     ptr_idxs += olengths_idxs[i];
3064     ptr_vals += olengths_vals[i];
3065   }
3066   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3068   ierr = MatAssemblyBegin(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3069   ierr = MatAssemblyEnd(new_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3070 
3071   { /* check */
3072     Vec       lvec,rvec;
3073     PetscReal infty_error;
3074 
3075     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3076     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3077     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3078     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3079     ierr = MatMultAdd(new_mat,rvec,lvec,lvec);CHKERRQ(ierr);
3080     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3081     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3082     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3083     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3084   }
3085 
3086   /* free workspace */
3087   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3088   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3089   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3090   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3091   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3092   if (isdense) {
3093     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3094     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3095   } else {
3096     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3097   }
3098   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3099   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3100   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3101   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3102   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3103   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3104   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3105   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3106   ierr = PetscFree(onodes);CHKERRQ(ierr);
3107   /* get back new mat */
3108   *mat_n = new_mat;
3109   PetscFunctionReturn(0);
3110 }
3111 
3112 #undef __FUNCT__
3113 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3114 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3115 {
3116   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3117   PC_IS                  *pcis = (PC_IS*)pc->data;
3118   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3119   MatNullSpace           CoarseNullSpace=NULL;
3120   ISLocalToGlobalMapping coarse_islg;
3121   IS                     coarse_is;
3122   PetscInt               max_it;
3123   PetscInt               im_active=-1,active_procs=-1;
3124   PC                     pc_temp;
3125   PCType                 coarse_pc_type;
3126   KSPType                coarse_ksp_type;
3127   PetscBool              multilevel_requested,multilevel_allowed;
3128   PetscBool              setsym,issym,isbddc,isnn,coarse_reuse;
3129   MatStructure           matstruct;
3130   PetscErrorCode         ierr;
3131 
3132   PetscFunctionBegin;
3133   /* Assign global numbering to coarse dofs */
3134   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3135     PetscInt ocoarse_size;
3136     ocoarse_size = pcbddc->coarse_size;
3137     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3138     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3139     /* see if we can avoid some work */
3140     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3141       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3142         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3143         coarse_reuse = PETSC_FALSE;
3144       } else { /* we can safely reuse already computed coarse matrix */
3145         coarse_reuse = PETSC_TRUE;
3146       }
3147     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3148       coarse_reuse = PETSC_FALSE;
3149     }
3150   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3151     coarse_reuse = PETSC_TRUE;
3152   }
3153 
3154   /* infer some info from user */
3155   issym = PETSC_FALSE;
3156   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3157   multilevel_allowed = PETSC_FALSE;
3158   multilevel_requested = PETSC_FALSE;
3159   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3160   if (multilevel_requested) {
3161     /* count "active processes" */
3162     im_active = !!(pcis->n);
3163     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3164     if (active_procs/pcbddc->coarsening_ratio < 2) {
3165       multilevel_allowed = PETSC_FALSE;
3166     } else {
3167       multilevel_allowed = PETSC_TRUE;
3168     }
3169   }
3170 
3171   /* set defaults for coarse KSP and PC */
3172   if (multilevel_allowed) {
3173     if (issym) {
3174       coarse_ksp_type = KSPRICHARDSON;
3175     } else {
3176       coarse_ksp_type = KSPCHEBYSHEV;
3177     }
3178     coarse_pc_type = PCBDDC;
3179   } else {
3180     coarse_ksp_type = KSPPREONLY;
3181     coarse_pc_type = PCREDUNDANT;
3182   }
3183 
3184   /* create the coarse KSP object only once with defaults */
3185   if (!pcbddc->coarse_ksp) {
3186     char prefix[256],str_level[3];
3187     size_t len;
3188     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3189     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3190     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3191     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3192     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3193     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3194     /* prefix */
3195     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3196     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3197     if (!pcbddc->current_level) {
3198       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3199       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3200     } else {
3201       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3202       if (pcbddc->current_level>1) len -= 2;
3203       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3204       *(prefix+len)='\0';
3205       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3206       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3207     }
3208     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3209   }
3210   /* allow user customization */
3211   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3212   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3213   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3214 
3215   /* get some info after set from options */
3216   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3217   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3218   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3219   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3220     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3221     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3222     isbddc = PETSC_FALSE;
3223   }
3224 
3225   /* propagate BDDC info to the next level */
3226   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3227   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3228   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3229 
3230   /* creates temporary MATIS object for coarse matrix */
3231   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3232   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3233   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3234   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3235   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3236   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3237   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3239   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3240 
3241   /* assemble coarse matrix */
3242   if (isbddc || isnn) {
3243     ierr = MatISSubassemble(coarse_mat_is,NULL,pcbddc->coarsening_ratio,&coarse_mat);CHKERRQ(ierr);
3244   } else {
3245     if (coarse_reuse) {
3246       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3247       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3248       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3249     } else {
3250       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3251     }
3252   }
3253   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3254 
3255   /* create local to global scatters for coarse problem */
3256   if (pcbddc->new_primal_space) {
3257     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3258     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3259     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3260     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3261     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3262   }
3263   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3264 
3265   /* propagate symmetry info to coarse matrix */
3266   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3267 
3268   /* Compute coarse null space (special handling by BDDC only) */
3269   if (pcbddc->NullSpace) {
3270     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3271     if (isbddc) {
3272       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3273     } else {
3274       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3275     }
3276   }
3277 
3278   /* set operators */
3279   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3280   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3281 
3282   /* additional KSP customization */
3283   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3284   if (max_it < 5) {
3285     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3286   }
3287   /* ierr = KSPChebyshevSetEstimateEigenvalues(pcbddc->coarse_ksp,1.0,0.0,0.0,1.1);CHKERRQ(ierr); */
3288 
3289 
3290   /* print some info if requested */
3291   if (pcbddc->dbg_flag) {
3292     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3293     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3294     if (!multilevel_allowed) {
3295       if (multilevel_requested) {
3296         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3297       } else if (pcbddc->max_levels) {
3298         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3299       }
3300     }
3301     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3302     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3303   }
3304 
3305   /* setup coarse ksp */
3306   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3307   if (pcbddc->dbg_flag) {
3308     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3309     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3310     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3311     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3312   }
3313 
3314   /* Check coarse problem if requested */
3315   if (pcbddc->dbg_flag) {
3316     KSP       check_ksp;
3317     KSPType   check_ksp_type;
3318     PC        check_pc;
3319     Vec       check_vec;
3320     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3321     PetscInt  its;
3322     PetscBool ispreonly,compute;
3323 
3324     /* Create ksp object suitable for estimation of extreme eigenvalues */
3325     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3326     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3327     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3328     ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3329     if (ispreonly) {
3330       check_ksp_type = KSPPREONLY;
3331       compute = PETSC_FALSE;
3332     } else {
3333       if (issym) check_ksp_type = KSPCG;
3334       else check_ksp_type = KSPGMRES;
3335       compute = PETSC_TRUE;
3336     }
3337     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3338     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3339     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3340     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3341     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3342     /* create random vec */
3343     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3344     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3345     if (CoarseNullSpace) {
3346       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3347     }
3348     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3349     /* solve coarse problem */
3350     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3351     if (CoarseNullSpace) {
3352       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3353     }
3354     /* check coarse problem residual error */
3355     ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3356     ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3357     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3358     ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3359     ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3360     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3361     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3362     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3363     /* get eigenvalue estimation if preonly has not been requested */
3364     if (!ispreonly) {
3365       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3366       ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3367       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3368     }
3369     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3370     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3371   }
3372   /* free memory */
3373   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3374   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3375   PetscFunctionReturn(0);
3376 }
3377 
3378 #undef __FUNCT__
3379 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3380 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3381 {
3382   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3383   PC_IS*         pcis = (PC_IS*)pc->data;
3384   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3385   PetscInt       i,coarse_size;
3386   PetscInt       *local_primal_indices;
3387   PetscErrorCode ierr;
3388 
3389   PetscFunctionBegin;
3390   /* Compute global number of coarse dofs */
3391   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3392     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3393   }
3394   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3395 
3396   /* check numbering */
3397   if (pcbddc->dbg_flag) {
3398     PetscScalar coarsesum,*array;
3399 
3400     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3401     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3402     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3403     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3404     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3405     for (i=0;i<pcbddc->local_primal_size;i++) {
3406       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3407     }
3408     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3409     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3410     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3411     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3412     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3413     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3414     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3415     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3416     for (i=0;i<pcis->n;i++) {
3417       if (array[i] == 1.0) {
3418         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3419       }
3420     }
3421     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3422     for (i=0;i<pcis->n;i++) {
3423       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3424     }
3425     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3426     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3427     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3428     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3429     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3430     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3431     if (pcbddc->dbg_flag > 1) {
3432       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3433       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3434       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3435       for (i=0;i<pcbddc->local_primal_size;i++) {
3436         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3437       }
3438       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3439     }
3440     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3441   }
3442   /* get back data */
3443   *coarse_size_n = coarse_size;
3444   *local_primal_indices_n = local_primal_indices;
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 #undef __FUNCT__
3449 #define __FUNCT__ "PCBDDCGlobalToLocal"
3450 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3451 {
3452   PC_IS*         pcis = (PC_IS*)pc->data;
3453   IS             localis_t;
3454   PetscInt       i,lsize,*idxs;
3455   PetscScalar    *vals;
3456   PetscErrorCode ierr;
3457 
3458   PetscFunctionBegin;
3459   /* get dirichlet indices in local ordering exploiting local to global map */
3460   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3461   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3462   for (i=0;i<lsize;i++) vals[i] = 1.0;
3463   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3464   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3465   ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3466   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3467   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3468   ierr = PetscFree(vals);CHKERRQ(ierr);
3469   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3470   /* now compute dirichlet set in local ordering */
3471   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3472   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3473   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3474   for (i=0,lsize=0;i<pcis->n;i++) {
3475     if (vals[i] == 1.0) {
3476       lsize++;
3477     }
3478   }
3479   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3480   for (i=0,lsize=0;i<pcis->n;i++) {
3481     if (vals[i] == 1.0) {
3482       idxs[lsize++] = i;
3483     }
3484   }
3485   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3486   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3487   *localis = localis_t;
3488   PetscFunctionReturn(0);
3489 }
3490