xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision 22b6e8a233cc850461988c728ea63b6b43cfb65b)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
117 {
118   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
119   PC_IS          *pcis = (PC_IS*)pc->data;
120   VecType        impVecType;
121   PetscInt       n_constraints,n_R,old_size;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   if (!pcbddc->ConstraintMatrix) {
126     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
127   }
128   /* get sizes */
129   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
130   n_R = pcis->n-pcbddc->n_actual_vertices;
131   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
132   /* local work vectors (try to avoid unneeded work)*/
133   /* R nodes */
134   old_size = -1;
135   if (pcbddc->vec1_R) {
136     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
137   }
138   if (n_R != old_size) {
139     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
140     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
141     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
143     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
144     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
145   }
146   /* local primal dofs */
147   old_size = -1;
148   if (pcbddc->vec1_P) {
149     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
150   }
151   if (pcbddc->local_primal_size != old_size) {
152     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
153     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
155     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
156   }
157   /* local explicit constraints */
158   old_size = -1;
159   if (pcbddc->vec1_C) {
160     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
161   }
162   if (n_constraints && n_constraints != old_size) {
163     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
164     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
166     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "PCBDDCSetUpCorrection"
173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
174 {
175   PetscErrorCode         ierr;
176   /* pointers to pcis and pcbddc */
177   PC_IS*                 pcis = (PC_IS*)pc->data;
178   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
179   /* submatrices of local problem */
180   Mat                    A_RV,A_VR,A_VV;
181   /* working matrices */
182   Mat                    M1,M2,M3,C_CR;
183   /* working vectors */
184   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
185   /* additional working stuff */
186   IS                     is_aux;
187   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
188   const PetscScalar      *array,*row_cmat_values;
189   const PetscInt         *row_cmat_indices,*idx_R_local;
190   PetscInt               *idx_V_B,*auxindices;
191   PetscInt               n_vertices,n_constraints,size_of_constraint;
192   PetscInt               i,j,n_R,n_D,n_B;
193   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
194   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
195   MatType                impMatType;
196   /* some shortcuts to scalars */
197   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
198   /* for debugging purposes */
199   PetscReal              *coarsefunctions_errors,*constraints_errors;
200 
201   PetscFunctionBegin;
202   /* get number of vertices (corners plus constraints with change of basis)
203      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
204   n_vertices = pcbddc->n_actual_vertices;
205   n_constraints = pcbddc->local_primal_size-n_vertices;
206   /* Set Non-overlapping dimensions */
207   n_B = pcis->n_B; n_D = pcis->n - n_B;
208   n_R = pcis->n-n_vertices;
209 
210   /* Set types for local objects needed by BDDC precondtioner */
211   impMatType = MATSEQDENSE;
212 
213   /* Allocating some extra storage just to be safe */
214   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
215   for (i=0;i<pcis->n;i++) auxindices[i]=i;
216 
217   /* vertices in boundary numbering */
218   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
219   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
220   if (i != n_vertices) {
221     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
222   }
223 
224   /* Precompute stuffs needed for preprocessing and application of BDDC*/
225   if (n_constraints) {
226     /* see if we can save some allocations */
227     if (pcbddc->local_auxmat2) {
228       PetscInt on_R,on_constraints;
229       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
230       if (on_R != n_R || on_constraints != n_constraints) {
231         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
232         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
233       }
234     }
235     /* work vectors */
236     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
238     /* auxiliary matrices */
239     if (!pcbddc->local_auxmat2) {
240       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
241       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
242       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
243       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
244     }
245 
246     /* Extract constraints on R nodes: C_{CR}  */
247     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
248     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
249     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
250 
251     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
252     for (i=0;i<n_constraints;i++) {
253       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
254       /* Get row of constraint matrix in R numbering */
255       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
256       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
257       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
259       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
260       /* Solve for row of constraint matrix in R numbering */
261       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
262       /* Set values in local_auxmat2 */
263       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
264       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
265       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266     }
267     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
268     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
270 
271     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
272     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
273     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
274     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
275     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
276     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
277     ierr = MatSetUp(M1);CHKERRQ(ierr);
278     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
279     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
280     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
281     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
282     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
283     ierr = MatDestroy(&M2);CHKERRQ(ierr);
284     ierr = MatDestroy(&M3);CHKERRQ(ierr);
285     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
286     if (!pcbddc->local_auxmat1) {
287       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
288     } else {
289       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     }
291   }
292 
293   /* Get submatrices from subdomain matrix */
294   if (n_vertices) {
295     PetscInt ibs,mbs;
296     PetscBool issbaij;
297     Mat newmat;
298 
299     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
300     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
301     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
302     if (ibs != mbs) { /* need to convert to SEQAIJ */
303       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
304       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
307       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
308     } else {
309       /* this is safe */
310       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
311       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
312       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
313         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
314         /* which of the two approaches is faster? */
315         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
316         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
317         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
319         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
320       } else {
321         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
322         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323       }
324     }
325     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
326     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
327     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
328   }
329 
330   /* Matrix of coarse basis functions (local) */
331   if (pcbddc->coarse_phi_B) {
332     PetscInt on_B,on_primal;
333     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
334     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
335       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
336       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
337     }
338   }
339   if (pcbddc->coarse_phi_D) {
340     PetscInt on_D,on_primal;
341     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
342     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
343       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
344       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
345     }
346   }
347   if (!pcbddc->coarse_phi_B) {
348     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
349     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
350     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
351     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
352   }
353   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
354     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
355     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
356     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
357     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
358   }
359 
360   if (pcbddc->dbg_flag) {
361     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
362     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
363     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
364   }
365   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
366   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
367 
368   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
369 
370   /* vertices */
371   for (i=0;i<n_vertices;i++) {
372     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
373     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
374     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
375     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
376     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
377     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
378     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
379     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
382     if (n_constraints) {
383       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
384       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
385       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
386     }
387     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
388     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
389 
390     /* Set values in coarse basis function and subdomain part of coarse_mat */
391     /* coarse basis functions */
392     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
393     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
394     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
396     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
397     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
399     if (pcbddc->switch_static || pcbddc->dbg_flag) {
400       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
403       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405     }
406     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
407     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
408     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
409     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     if (n_constraints) {
411       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
412       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
413       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
414     }
415 
416     /* check */
417     if (pcbddc->dbg_flag) {
418       /* assemble subdomain vector on local nodes */
419       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
420       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
421       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
425       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
426       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
427       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
428       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
429       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
430       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       if (n_constraints) {
432         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
433         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
434         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
435       }
436       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
439       /* check saddle point solution */
440       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
443       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
444       /* shift by the identity matrix */
445       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
449     }
450   }
451 
452   /* constraints */
453   for (i=0;i<n_constraints;i++) {
454     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
455     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
456     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
457     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
458     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
459     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
460     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
461     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
462     if (n_vertices) {
463       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
464     }
465     /* Set values in coarse basis function and subdomain part of coarse_mat */
466     /* coarse basis functions */
467     j = i+n_vertices; /* don't touch this! */
468     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
469     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
472     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
473     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     if (pcbddc->switch_static || pcbddc->dbg_flag) {
475       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
478       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
479       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480     }
481     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
482     if (n_vertices) {
483       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
484       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
485       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
486     }
487     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
488     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
489     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
490 
491     if (pcbddc->dbg_flag) {
492       /* assemble subdomain vector on nodes */
493       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
494       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
495       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
496       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
498       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
499       /* assemble subdomain vector of lagrange multipliers */
500       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
501       if (n_vertices) {
502         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
503         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
504         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
505       }
506       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
507       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
508       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
510       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
512       /* check saddle point solution */
513       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
514       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
516       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
517       /* shift by the identity matrix */
518       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
519       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
522     }
523   }
524   /* call assembling routines for local coarse basis */
525   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   if (pcbddc->switch_static || pcbddc->dbg_flag) {
528     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   }
531 
532   /* compute other basis functions for non-symmetric problems */
533   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
534   if (!setsym || (setsym && !issym)) {
535     if (!pcbddc->coarse_psi_B) {
536       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
537       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
538       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
539       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
540     }
541     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
542       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
543       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
544       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
545       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
546     }
547     for (i=0;i<pcbddc->local_primal_size;i++) {
548       if (n_constraints) {
549         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
550         for (j=0;j<n_constraints;j++) {
551           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
552         }
553         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
554         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
555       }
556       if (i<n_vertices) {
557         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
558         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
559         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
560         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
561         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
562         if (n_constraints) {
563           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
564         }
565       } else {
566         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
567       }
568       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
569       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
570       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
573       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
574       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       if (i<n_vertices) {
576         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
577       }
578       if (pcbddc->switch_static || pcbddc->dbg_flag) {
579         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
580         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
582         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
583         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584       }
585 
586       if (pcbddc->dbg_flag) {
587         /* assemble subdomain vector on nodes */
588         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
589         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
590         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
591         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         if (i<n_vertices) {
593           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
594         }
595         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
596         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
597         /* assemble subdomain vector of lagrange multipliers */
598         for (j=0;j<pcbddc->local_primal_size;j++) {
599           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
600         }
601         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
602         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
603         /* check saddle point solution */
604         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
605         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
607         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
608         /* shift by the identity matrix */
609         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
610         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
611         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
613       }
614     }
615     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
616     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     if (pcbddc->switch_static || pcbddc->dbg_flag) {
618       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     }
621   }
622   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
623   /* Checking coarse_sub_mat and coarse basis functios */
624   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
625   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
626   if (pcbddc->dbg_flag) {
627     Mat         coarse_sub_mat;
628     Mat         AUXMAT,TM1,TM2,TM3,TM4;
629     Mat         coarse_phi_D,coarse_phi_B;
630     Mat         coarse_psi_D,coarse_psi_B;
631     Mat         A_II,A_BB,A_IB,A_BI;
632     MatType     checkmattype=MATSEQAIJ;
633     PetscReal   real_value;
634 
635     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
636     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
637     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
638     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
639     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
640     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
641     if (pcbddc->coarse_psi_B) {
642       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
643       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
644     }
645     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
646 
647     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
648     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
649     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
650     if (pcbddc->coarse_psi_B) {
651       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
652       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
653       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
654       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
655       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
656       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
657       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
658       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
659       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
660       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
661       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
662       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
663     } else {
664       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
665       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
666       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
667       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
668       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
669       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
670       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
671       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
672     }
673     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
674     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
675     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
676     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
677     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
679     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
680     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
681     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
684     for (i=0;i<pcbddc->local_primal_size;i++) {
685       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
686     }
687     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
688     for (i=0;i<pcbddc->local_primal_size;i++) {
689       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
690     }
691     if (pcbddc->coarse_psi_B) {
692       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
693       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
694         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
695       }
696       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
697       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
698         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
699       }
700     }
701     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
702     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
703     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
706     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
707     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
710     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
711     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
712     if (pcbddc->coarse_psi_B) {
713       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
714       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
715     }
716     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
717     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
718     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
719     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
720   }
721   /* free memory */
722   if (n_vertices) {
723     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
724     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
725     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
727     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
728   }
729   if (n_constraints) {
730     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
731     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
732     ierr = MatDestroy(&M1);CHKERRQ(ierr);
733     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
734   }
735   ierr = PetscFree(auxindices);CHKERRQ(ierr);
736   /* get back data */
737   *coarse_submat_vals_n = coarse_submat_vals;
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
743 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
744 {
745   PC_IS*            pcis = (PC_IS*)(pc->data);
746   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
747   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
748   PetscBool         issbaij,isbaij;
749   /* manage repeated solves */
750   MatReuse          reuse;
751   MatStructure      matstruct;
752   PetscErrorCode    ierr;
753 
754   PetscFunctionBegin;
755   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
756     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
757   }
758   /* get mat flags */
759   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
760   reuse = MAT_INITIAL_MATRIX;
761   if (pc->setupcalled) {
762     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
763     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
764     if (matstruct == SAME_NONZERO_PATTERN) {
765       reuse = MAT_REUSE_MATRIX;
766     } else {
767       reuse = MAT_INITIAL_MATRIX;
768     }
769   }
770   if (reuse == MAT_INITIAL_MATRIX) {
771     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
772     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
773     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
774     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
775     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
776   }
777 
778   /* transform local matrices if needed */
779   if (pcbddc->use_change_of_basis) {
780     Mat         change_mat_all;
781     PetscScalar *row_cmat_values;
782     PetscInt    *row_cmat_indices;
783     PetscInt    *nnz,*is_indices,*temp_indices;
784     PetscInt    i,j,k,n_D,n_B;
785 
786     /* Get Non-overlapping dimensions */
787     n_B = pcis->n_B;
788     n_D = pcis->n-n_B;
789 
790     /* compute nonzero structure of change of basis on all local nodes */
791     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
792     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
793     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
794     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
795     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
796     k=1;
797     for (i=0;i<n_B;i++) {
798       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
799       nnz[is_indices[i]]=j;
800       if (k < j) k = j;
801       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
802     }
803     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
804     /* assemble change of basis matrix on the whole set of local dofs */
805     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
806     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
807     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
808     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
809     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
810     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
811     for (i=0;i<n_D;i++) {
812       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
813     }
814     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
815     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816     for (i=0;i<n_B;i++) {
817       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
818       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
819       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
820       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
821     }
822     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
825     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
826     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
827     if (!issbaij && !isbaij) {
828       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
829     } else {
830       Mat work_mat;
831       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
832       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
833       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
834     }
835     /*
836     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
837     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
838     */
839     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
840     ierr = PetscFree(nnz);CHKERRQ(ierr);
841     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
842   } else {
843     /* without change of basis, the local matrix is unchanged */
844     if (!pcbddc->local_mat) {
845       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
846       pcbddc->local_mat = matis->A;
847     }
848   }
849 
850   /* get submatrices */
851   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
852   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
853   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
854   if (!issbaij) {
855     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
856     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
857   } else {
858     Mat newmat;
859     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
860     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
861     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
862     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
869 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
870 {
871   PC_IS*         pcis = (PC_IS*)(pc->data);
872   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
873   IS             is_aux1,is_aux2;
874   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
875   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
876   PetscInt       vbs,bs;
877   PetscBT        bitmask;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   /* No need to setup local scatters if primal space is unchanged */
882   if (!pcbddc->new_primal_space_local && !pcbddc->dbg_flag) {
883     PetscFunctionReturn(0);
884   }
885   /* destroy old objects */
886   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
887   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
888   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
889   /* Set Non-overlapping dimensions */
890   n_B = pcis->n_B; n_D = pcis->n - n_B;
891   n_vertices = pcbddc->n_actual_vertices;
892   /* create auxiliary bitmask */
893   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
894   for (i=0;i<n_vertices;i++) {
895     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
896   }
897 
898   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
899   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
900   for (i=0, n_R=0; i<pcis->n; i++) {
901     if (!PetscBTLookup(bitmask,i)) {
902       idx_R_local[n_R] = i;
903       n_R++;
904     }
905   }
906 
907   /* Block code */
908   vbs = 1;
909   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
910   if (bs>1 && !(n_vertices%bs)) {
911     PetscBool is_blocked = PETSC_TRUE;
912     PetscInt  *vary;
913     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
914     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
915     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
916     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
917     for (i=0; i<n_vertices; i++) {
918       if (vary[i]!=0 && vary[i]!=bs) {
919         is_blocked = PETSC_FALSE;
920         break;
921       }
922     }
923     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
924       vbs = bs;
925       for (i=0;i<n_R/vbs;i++) {
926         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
927       }
928     }
929     ierr = PetscFree(vary);CHKERRQ(ierr);
930   }
931   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
932   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
933 
934   /* print some info if requested */
935   if (pcbddc->dbg_flag) {
936     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
937     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
938     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
939     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
940     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
941     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
942     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
943     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
944   }
945 
946   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
947   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
948   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
949   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
950   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
951   for (i=0; i<n_D; i++) {
952     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
953   }
954   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
955   for (i=0, j=0; i<n_R; i++) {
956     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
957       aux_array1[j++] = i;
958     }
959   }
960   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
961   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
962   for (i=0, j=0; i<n_B; i++) {
963     if (!PetscBTLookup(bitmask,is_indices[i])) {
964       aux_array2[j++] = i;
965     }
966   }
967   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
968   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
969   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
970   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
971   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
972 
973   if (pcbddc->switch_static || pcbddc->dbg_flag) {
974     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
975     for (i=0, j=0; i<n_R; i++) {
976       if (PetscBTLookup(bitmask,idx_R_local[i])) {
977         aux_array1[j++] = i;
978       }
979     }
980     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
981     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
982     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
983   }
984   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
985   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
992 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
993 {
994   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
995   PC_IS          *pcis = (PC_IS*)pc->data;
996   PC             pc_temp;
997   Mat            A_RR;
998   MatStructure   matstruct;
999   MatReuse       reuse;
1000   PetscScalar    m_one = -1.0;
1001   PetscReal      value;
1002   PetscInt       n_D,n_R,ibs,mbs;
1003   PetscBool      use_exact,use_exact_reduced,issbaij;
1004   PetscErrorCode ierr;
1005   /* prefixes stuff */
1006   char           dir_prefix[256],neu_prefix[256],str_level[3];
1007   size_t         len;
1008 
1009   PetscFunctionBegin;
1010   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1011 
1012   /* compute prefixes */
1013   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1014   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1015   if (!pcbddc->current_level) {
1016     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1017     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1018     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1019     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1020   } else {
1021     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1022     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1023     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1024     len -= 15; /* remove "pc_bddc_coarse_" */
1025     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1026     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1027     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1028     *(dir_prefix+len)='\0';
1029     *(neu_prefix+len)='\0';
1030     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1031     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1032     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1033     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1034   }
1035 
1036   /* DIRICHLET PROBLEM */
1037   /* Matrix for Dirichlet problem is pcis->A_II */
1038   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1039   if (!pcbddc->ksp_D) { /* create object if not yet build */
1040     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1041     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1042     /* default */
1043     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1044     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1045     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1046     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1047     if (issbaij) {
1048       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1049     } else {
1050       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1051     }
1052     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1053   }
1054   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1055   /* Allow user's customization */
1056   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1057   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1058   if (!n_D) {
1059     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1060     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1061   }
1062   /* Set Up KSP for Dirichlet problem of BDDC */
1063   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1064   /* set ksp_D into pcis data */
1065   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1066   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1067   pcis->ksp_D = pcbddc->ksp_D;
1068 
1069   /* NEUMANN PROBLEM */
1070   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1071   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1072   if (pcbddc->ksp_R) { /* already created ksp */
1073     PetscInt nn_R;
1074     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1075     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1076     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1077     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1078       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1079       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1080       reuse = MAT_INITIAL_MATRIX;
1081     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1082       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1083         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1084         reuse = MAT_INITIAL_MATRIX;
1085       } else { /* safe to reuse the matrix */
1086         reuse = MAT_REUSE_MATRIX;
1087       }
1088     }
1089     /* last check */
1090     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1091       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1092       reuse = MAT_INITIAL_MATRIX;
1093     }
1094   } else { /* first time, so we need to create the matrix */
1095     reuse = MAT_INITIAL_MATRIX;
1096   }
1097   /* extract A_RR */
1098   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1099   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1100   if (ibs != mbs) {
1101     Mat newmat;
1102     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1103     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1104     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1105   } else {
1106     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1107   }
1108   if (!pcbddc->ksp_R) { /* create object if not present */
1109     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1110     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1111     /* default */
1112     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1113     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1114     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1115     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1116     if (issbaij) {
1117       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1118     } else {
1119       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1120     }
1121     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1122   }
1123   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1124   /* Allow user's customization */
1125   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1126   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1127   if (!n_R) {
1128     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1129     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1130   }
1131   /* Set Up KSP for Neumann problem of BDDC */
1132   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1133 
1134   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1135   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1136     /* Dirichlet */
1137     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1138     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1139     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1140     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1141     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1142     /* need to be adapted? */
1143     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1144     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1145     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1146     /* print info */
1147     if (pcbddc->dbg_flag) {
1148       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1149       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1150       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1151       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1152       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1153       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1154     }
1155     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1156       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1157     }
1158 
1159     /* Neumann */
1160     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1161     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1162     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1163     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1164     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1165     /* need to be adapted? */
1166     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1167     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1168     /* print info */
1169     if (pcbddc->dbg_flag) {
1170       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1171       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1172     }
1173     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1174       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1175     }
1176   }
1177   /* free Neumann problem's matrix */
1178   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1184 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1185 {
1186   PetscErrorCode ierr;
1187   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1188 
1189   PetscFunctionBegin;
1190   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1191   if (pcbddc->local_auxmat1) {
1192     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1193     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1200 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1201 {
1202   PetscErrorCode ierr;
1203   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1204   PC_IS*            pcis = (PC_IS*)  (pc->data);
1205   const PetscScalar zero = 0.0;
1206 
1207   PetscFunctionBegin;
1208   /* Application of PHI^T (or PSI^T)  */
1209   if (pcbddc->coarse_psi_B) {
1210     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1211     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1212   } else {
1213     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1214     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1215   }
1216   /* Scatter data of coarse_rhs */
1217   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1218   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1219 
1220   /* Local solution on R nodes */
1221   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1222   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1223   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   if (pcbddc->switch_static) {
1225     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1226     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1227   }
1228   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1229   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1230   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1232   if (pcbddc->switch_static) {
1233     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235   }
1236 
1237   /* Coarse solution */
1238   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1240     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1241   }
1242   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1243   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1244 
1245   /* Sum contributions from two levels */
1246   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1247   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1253 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1254 {
1255   PetscErrorCode ierr;
1256   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1257 
1258   PetscFunctionBegin;
1259   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1265 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1266 {
1267   PetscErrorCode ierr;
1268   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1269 
1270   PetscFunctionBegin;
1271   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 /* uncomment for testing purposes */
1276 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1277 #undef __FUNCT__
1278 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1279 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1280 {
1281   PetscErrorCode    ierr;
1282   PC_IS*            pcis = (PC_IS*)(pc->data);
1283   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1284   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1285   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1286   MatType           impMatType=MATSEQAIJ;
1287   /* one and zero */
1288   PetscScalar       one=1.0,zero=0.0;
1289   /* space to store constraints and their local indices */
1290   PetscScalar       *temp_quadrature_constraint;
1291   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1292   /* iterators */
1293   PetscInt          i,j,k,total_counts,temp_start_ptr;
1294   /* stuff to store connected components stored in pcbddc->mat_graph */
1295   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1296   PetscInt          n_ISForFaces,n_ISForEdges;
1297   /* near null space stuff */
1298   MatNullSpace      nearnullsp;
1299   const Vec         *nearnullvecs;
1300   Vec               *localnearnullsp;
1301   PetscBool         nnsp_has_cnst;
1302   PetscInt          nnsp_size;
1303   PetscScalar       *array;
1304   /* BLAS integers */
1305   PetscBLASInt      lwork,lierr;
1306   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1307   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1308   /* LAPACK working arrays for SVD or POD */
1309   PetscBool         skip_lapack;
1310   PetscScalar       *work;
1311   PetscReal         *singular_vals;
1312 #if defined(PETSC_USE_COMPLEX)
1313   PetscReal         *rwork;
1314 #endif
1315 #if defined(PETSC_MISSING_LAPACK_GESVD)
1316   PetscBLASInt      Blas_one_2=1;
1317   PetscScalar       *temp_basis,*correlation_mat;
1318 #else
1319   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1320   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1321 #endif
1322   /* reuse */
1323   PetscInt          olocal_primal_size;
1324   PetscInt          *oprimal_indices_local_idxs;
1325   /* change of basis */
1326   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1327   PetscBool         boolforchange,qr_needed;
1328   PetscBT           touched,change_basis,qr_needed_idx;
1329   /* auxiliary stuff */
1330   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1331   /* some quantities */
1332   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1333   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1334 
1335 
1336   PetscFunctionBegin;
1337   /* Destroy Mat objects computed previously */
1338   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1339   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1340   /* Get index sets for faces, edges and vertices from graph */
1341   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1342     pcbddc->use_vertices = PETSC_TRUE;
1343   }
1344   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1345   /* HACK: provide functions to set change of basis */
1346   if (!ISForVertices && pcbddc->NullSpace) {
1347     pcbddc->use_change_of_basis = PETSC_TRUE;
1348     pcbddc->use_change_on_faces = PETSC_FALSE;
1349   }
1350   /* print some info */
1351   if (pcbddc->dbg_flag) {
1352     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1353     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1354     i = 0;
1355     if (ISForVertices) {
1356       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1359     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1360     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1361     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1362   }
1363   /* check if near null space is attached to global mat */
1364   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1365   if (nearnullsp) {
1366     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1367     /* remove any stored info */
1368     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1369     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1370     /* store information for BDDC solver reuse */
1371     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1372     pcbddc->onearnullspace = nearnullsp;
1373     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1374     for (i=0;i<nnsp_size;i++) {
1375       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1376     }
1377   } else { /* if near null space is not provided BDDC uses constants by default */
1378     nnsp_size = 0;
1379     nnsp_has_cnst = PETSC_TRUE;
1380   }
1381   /* get max number of constraints on a single cc */
1382   max_constraints = nnsp_size;
1383   if (nnsp_has_cnst) max_constraints++;
1384 
1385   /*
1386        Evaluate maximum storage size needed by the procedure
1387        - temp_indices will contain start index of each constraint stored as follows
1388        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1389        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1390        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1391                                                                                                                                                          */
1392   total_counts = n_ISForFaces+n_ISForEdges;
1393   total_counts *= max_constraints;
1394   n_vertices = 0;
1395   if (ISForVertices) {
1396     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1397   }
1398   total_counts += n_vertices;
1399   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1400   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1401   total_counts = 0;
1402   max_size_of_constraint = 0;
1403   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1404     if (i<n_ISForEdges) {
1405       used_IS = &ISForEdges[i];
1406     } else {
1407       used_IS = &ISForFaces[i-n_ISForEdges];
1408     }
1409     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1410     total_counts += j;
1411     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1412   }
1413   total_counts *= max_constraints;
1414   total_counts += n_vertices;
1415   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1416   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1417   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1418   /* get local part of global near null space vectors */
1419   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1420   for (k=0;k<nnsp_size;k++) {
1421     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1422     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424   }
1425 
1426   /* whether or not to skip lapack calls */
1427   skip_lapack = PETSC_TRUE;
1428   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1429 
1430   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1431   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1432     PetscScalar temp_work;
1433 #if defined(PETSC_MISSING_LAPACK_GESVD)
1434     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1435     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1436     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1437     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1438 #if defined(PETSC_USE_COMPLEX)
1439     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1440 #endif
1441     /* now we evaluate the optimal workspace using query with lwork=-1 */
1442     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1443     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1444     lwork = -1;
1445     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1446 #if !defined(PETSC_USE_COMPLEX)
1447     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1448 #else
1449     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1450 #endif
1451     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1452     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1453 #else /* on missing GESVD */
1454     /* SVD */
1455     PetscInt max_n,min_n;
1456     max_n = max_size_of_constraint;
1457     min_n = max_constraints;
1458     if (max_size_of_constraint < max_constraints) {
1459       min_n = max_size_of_constraint;
1460       max_n = max_constraints;
1461     }
1462     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1463 #if defined(PETSC_USE_COMPLEX)
1464     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1465 #endif
1466     /* now we evaluate the optimal workspace using query with lwork=-1 */
1467     lwork = -1;
1468     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1469     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1470     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1471     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1472 #if !defined(PETSC_USE_COMPLEX)
1473     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1474 #else
1475     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1476 #endif
1477     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1478     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1479 #endif /* on missing GESVD */
1480     /* Allocate optimal workspace */
1481     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1482     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1483   }
1484   /* Now we can loop on constraining sets */
1485   total_counts = 0;
1486   temp_indices[0] = 0;
1487   /* vertices */
1488   if (ISForVertices) {
1489     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1490     if (nnsp_has_cnst) { /* consider all vertices */
1491       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1492       for (i=0;i<n_vertices;i++) {
1493         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1494         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1495         total_counts++;
1496       }
1497     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1498       PetscBool used_vertex;
1499       for (i=0;i<n_vertices;i++) {
1500         used_vertex = PETSC_FALSE;
1501         k = 0;
1502         while (!used_vertex && k<nnsp_size) {
1503           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1504           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1505             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1506             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1507             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1508             total_counts++;
1509             used_vertex = PETSC_TRUE;
1510           }
1511           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1512           k++;
1513         }
1514       }
1515     }
1516     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1517     n_vertices = total_counts;
1518   }
1519 
1520   /* edges and faces */
1521   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1522     if (i<n_ISForEdges) {
1523       used_IS = &ISForEdges[i];
1524       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1525     } else {
1526       used_IS = &ISForFaces[i-n_ISForEdges];
1527       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1528     }
1529     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1530     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1531     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1532     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1533     /* change of basis should not be performed on local periodic nodes */
1534     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1535     if (nnsp_has_cnst) {
1536       PetscScalar quad_value;
1537       temp_constraints++;
1538       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1539       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1540       for (j=0;j<size_of_constraint;j++) {
1541         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1542       }
1543       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1544       total_counts++;
1545     }
1546     for (k=0;k<nnsp_size;k++) {
1547       PetscReal real_value;
1548       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1549       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1550       for (j=0;j<size_of_constraint;j++) {
1551         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1552       }
1553       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1554       /* check if array is null on the connected component */
1555       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1556       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1557       if (real_value > 0.0) { /* keep indices and values */
1558         temp_constraints++;
1559         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1560         total_counts++;
1561       }
1562     }
1563     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1564     valid_constraints = temp_constraints;
1565     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1566     if (!pcbddc->use_nnsp_true && temp_constraints) {
1567       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1568 
1569 #if defined(PETSC_MISSING_LAPACK_GESVD)
1570       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1571          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1572          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1573             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1574             from that computed using LAPACKgesvd
1575          -> This is due to a different computation of eigenvectors in LAPACKheev
1576          -> The quality of the POD-computed basis will be the same */
1577       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1578       /* Store upper triangular part of correlation matrix */
1579       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1580       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1581       for (j=0;j<temp_constraints;j++) {
1582         for (k=0;k<j+1;k++) {
1583           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1584         }
1585       }
1586       /* compute eigenvalues and eigenvectors of correlation matrix */
1587       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1588       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1589 #if !defined(PETSC_USE_COMPLEX)
1590       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1591 #else
1592       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1593 #endif
1594       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1595       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1596       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1597       j=0;
1598       while (j < temp_constraints && singular_vals[j] < tol) j++;
1599       total_counts=total_counts-j;
1600       valid_constraints = temp_constraints-j;
1601       /* scale and copy POD basis into used quadrature memory */
1602       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1603       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1604       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1605       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1606       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1607       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1608       if (j<temp_constraints) {
1609         PetscInt ii;
1610         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1611         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1612         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1613         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1614         for (k=0;k<temp_constraints-j;k++) {
1615           for (ii=0;ii<size_of_constraint;ii++) {
1616             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1617           }
1618         }
1619       }
1620 #else  /* on missing GESVD */
1621       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1622       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1623       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1624       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1625 #if !defined(PETSC_USE_COMPLEX)
1626       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1627 #else
1628       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1629 #endif
1630       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1631       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1632       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1633       k = temp_constraints;
1634       if (k > size_of_constraint) k = size_of_constraint;
1635       j = 0;
1636       while (j < k && singular_vals[k-j-1] < tol) j++;
1637       total_counts = total_counts-temp_constraints+k-j;
1638       valid_constraints = k-j;
1639 #endif /* on missing GESVD */
1640     }
1641     /* setting change_of_basis flag is safe now */
1642     if (boolforchange) {
1643       for (j=0;j<valid_constraints;j++) {
1644         PetscBTSet(change_basis,total_counts-j-1);
1645       }
1646     }
1647   }
1648   /* free index sets of faces, edges and vertices */
1649   for (i=0;i<n_ISForFaces;i++) {
1650     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1651   }
1652   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1653   for (i=0;i<n_ISForEdges;i++) {
1654     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1655   }
1656   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1657   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1658   /* map temp_indices_to_constraint in boundary numbering */
1659   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1660   if (i != temp_indices[total_counts]) {
1661     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1662   }
1663 
1664   /* free workspace */
1665   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1666     ierr = PetscFree(work);CHKERRQ(ierr);
1667 #if defined(PETSC_USE_COMPLEX)
1668     ierr = PetscFree(rwork);CHKERRQ(ierr);
1669 #endif
1670     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1671 #if defined(PETSC_MISSING_LAPACK_GESVD)
1672     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1673     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1674 #endif
1675   }
1676   for (k=0;k<nnsp_size;k++) {
1677     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1678   }
1679   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1680 
1681   /* set quantities in pcbddc data structure and store previous primal size */
1682   /* n_vertices defines the number of subdomain corners in the primal space */
1683   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1684   olocal_primal_size = pcbddc->local_primal_size;
1685   pcbddc->local_primal_size = total_counts;
1686   pcbddc->n_vertices = n_vertices;
1687   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1688 
1689   /* Create constraint matrix */
1690   /* The constraint matrix is used to compute the l2g map of primal dofs */
1691   /* so we need to set it up properly either with or without change of basis */
1692   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1693   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1694   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1695   /* array to compute a local numbering of constraints : vertices first then constraints */
1696   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1697   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1698   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1699   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1700   /* auxiliary stuff for basis change */
1701   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1702   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1703 
1704   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1705   total_primal_vertices=0;
1706   for (i=0;i<pcbddc->local_primal_size;i++) {
1707     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1708     if (size_of_constraint == 1) {
1709       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1710       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1711       aux_primal_minloc[total_primal_vertices]=0;
1712       total_primal_vertices++;
1713     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1714       PetscInt min_loc,min_index;
1715       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1716       /* find first untouched local node */
1717       k = 0;
1718       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1719       min_index = global_indices[k];
1720       min_loc = k;
1721       /* search the minimum among global nodes already untouched on the cc */
1722       for (k=1;k<size_of_constraint;k++) {
1723         /* there can be more than one constraint on a single connected component */
1724         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1725           min_index = global_indices[k];
1726           min_loc = k;
1727         }
1728       }
1729       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1730       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1731       aux_primal_minloc[total_primal_vertices]=min_loc;
1732       total_primal_vertices++;
1733     }
1734   }
1735   /* determine if a QR strategy is needed for change of basis */
1736   qr_needed = PETSC_FALSE;
1737   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1738   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1739     if (PetscBTLookup(change_basis,i)) {
1740       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1741       j = 0;
1742       for (k=0;k<size_of_constraint;k++) {
1743         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1744           j++;
1745         }
1746       }
1747       /* found more than one primal dof on the cc */
1748       if (j > 1) {
1749         PetscBTSet(qr_needed_idx,i);
1750         qr_needed = PETSC_TRUE;
1751       }
1752     }
1753   }
1754   /* free workspace */
1755   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1756 
1757   /* permute indices in order to have a sorted set of vertices */
1758   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1759 
1760   /* nonzero structure of constraint matrix */
1761   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1762   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1763   j=total_primal_vertices;
1764   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1765     if (!PetscBTLookup(change_basis,i)) {
1766       nnz[j]=temp_indices[i+1]-temp_indices[i];
1767       j++;
1768     }
1769   }
1770   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1771   ierr = PetscFree(nnz);CHKERRQ(ierr);
1772   /* set values in constraint matrix */
1773   for (i=0;i<total_primal_vertices;i++) {
1774     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1775   }
1776   total_counts = total_primal_vertices;
1777   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1778     if (!PetscBTLookup(change_basis,i)) {
1779       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1780       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1781       total_counts++;
1782     }
1783   }
1784   /* assembling */
1785   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1786   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1787   /*
1788   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1789   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1790   */
1791   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1792   if (pcbddc->use_change_of_basis) {
1793     /* dual and primal dofs on a single cc */
1794     PetscInt     dual_dofs,primal_dofs;
1795     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1796     PetscInt     primal_counter;
1797     /* working stuff for GEQRF */
1798     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1799     PetscBLASInt lqr_work;
1800     /* working stuff for UNGQR */
1801     PetscScalar  *gqr_work,lgqr_work_t;
1802     PetscBLASInt lgqr_work;
1803     /* working stuff for TRTRS */
1804     PetscScalar  *trs_rhs;
1805     PetscBLASInt Blas_NRHS;
1806     /* pointers for values insertion into change of basis matrix */
1807     PetscInt     *start_rows,*start_cols;
1808     PetscScalar  *start_vals;
1809     /* working stuff for values insertion */
1810     PetscBT      is_primal;
1811 
1812     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1813     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1814     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1815     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1816     /* work arrays */
1817     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1818     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1819     /* nonzeros per row */
1820     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1821       if (PetscBTLookup(change_basis,i)) {
1822         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1823         if (PetscBTLookup(qr_needed_idx,i)) {
1824           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1825         } else {
1826           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1827           /* get local primal index on the cc */
1828           j = 0;
1829           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1830           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1831         }
1832       }
1833     }
1834     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1835     ierr = PetscFree(nnz);CHKERRQ(ierr);
1836     /* Set initial identity in the matrix */
1837     for (i=0;i<pcis->n_B;i++) {
1838       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1839     }
1840 
1841     if (pcbddc->dbg_flag) {
1842       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1843       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1844     }
1845 
1846 
1847     /* Now we loop on the constraints which need a change of basis */
1848     /*
1849        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1850        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1851 
1852        Basic blocks of change of basis matrix T computed
1853 
1854           - Using the following block transformation if there is only a primal dof on the cc
1855             (in the example, primal dof is the last one of the edge in LOCAL ordering
1856              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1857             | 1        0   ...        0     1 |
1858             | 0        1   ...        0     1 |
1859             |              ...                |
1860             | 0        ...            1     1 |
1861             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1862 
1863           - via QR decomposition of constraints otherwise
1864     */
1865     if (qr_needed) {
1866       /* space to store Q */
1867       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1868       /* first we issue queries for optimal work */
1869       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1870       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1871       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1872       lqr_work = -1;
1873       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1874       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1875       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1876       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1877       lgqr_work = -1;
1878       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1879       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1880       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1881       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1882       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1883       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1884       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1885       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1886       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1887       /* array to store scaling factors for reflectors */
1888       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1889       /* array to store rhs and solution of triangular solver */
1890       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1891       /* allocating workspace for check */
1892       if (pcbddc->dbg_flag) {
1893         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1894       }
1895     }
1896     /* array to store whether a node is primal or not */
1897     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1898     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1899     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1900     if (i != total_primal_vertices) {
1901       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1902     }
1903     for (i=0;i<total_primal_vertices;i++) {
1904       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1905     }
1906     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1907 
1908     /* loop on constraints and see whether or not they need a change of basis and compute it */
1909     /* -> using implicit ordering contained in temp_indices data */
1910     total_counts = pcbddc->n_vertices;
1911     primal_counter = total_counts;
1912     while (total_counts<pcbddc->local_primal_size) {
1913       primal_dofs = 1;
1914       if (PetscBTLookup(change_basis,total_counts)) {
1915         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1916         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1917           primal_dofs++;
1918         }
1919         /* get constraint info */
1920         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1921         dual_dofs = size_of_constraint-primal_dofs;
1922 
1923         if (pcbddc->dbg_flag) {
1924           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1925         }
1926 
1927         if (primal_dofs > 1) { /* QR */
1928 
1929           /* copy quadrature constraints for change of basis check */
1930           if (pcbddc->dbg_flag) {
1931             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1932           }
1933           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1934           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1935 
1936           /* compute QR decomposition of constraints */
1937           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1938           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1939           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1940           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1941           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1942           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1943           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1944 
1945           /* explictly compute R^-T */
1946           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1947           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1948           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1949           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1950           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1951           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1952           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1953           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1954           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1955           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1956 
1957           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1958           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1959           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1960           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1961           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1962           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1963           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1964           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1965           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1966 
1967           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1968              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1969              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1970           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1971           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1972           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1973           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1974           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1975           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1976           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1977           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1978           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1979           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1980 
1981           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1982           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1983           /* insert cols for primal dofs */
1984           for (j=0;j<primal_dofs;j++) {
1985             start_vals = &qr_basis[j*size_of_constraint];
1986             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1987             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1988           }
1989           /* insert cols for dual dofs */
1990           for (j=0,k=0;j<dual_dofs;k++) {
1991             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1992               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
1993               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
1994               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1995               j++;
1996             }
1997           }
1998 
1999           /* check change of basis */
2000           if (pcbddc->dbg_flag) {
2001             PetscInt   ii,jj;
2002             PetscBool valid_qr=PETSC_TRUE;
2003             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2004             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2005             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2006             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2007             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2008             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2009             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2010             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2011             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2012             for (jj=0;jj<size_of_constraint;jj++) {
2013               for (ii=0;ii<primal_dofs;ii++) {
2014                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2015                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2016               }
2017             }
2018             if (!valid_qr) {
2019               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2020               for (jj=0;jj<size_of_constraint;jj++) {
2021                 for (ii=0;ii<primal_dofs;ii++) {
2022                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2023                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2024                   }
2025                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2026                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2027                   }
2028                 }
2029               }
2030             } else {
2031               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2032             }
2033           }
2034         } else { /* simple transformation block */
2035           PetscInt row,col;
2036           PetscScalar val;
2037           for (j=0;j<size_of_constraint;j++) {
2038             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2039             if (!PetscBTLookup(is_primal,row)) {
2040               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2041               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2042               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2043             } else {
2044               for (k=0;k<size_of_constraint;k++) {
2045                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2046                 if (row != col) {
2047                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2048                 } else {
2049                   val = 1.0;
2050                 }
2051                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2052               }
2053             }
2054           }
2055         }
2056         /* increment primal counter */
2057         primal_counter += primal_dofs;
2058       } else {
2059         if (pcbddc->dbg_flag) {
2060           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2061         }
2062       }
2063       /* increment constraint counter total_counts */
2064       total_counts += primal_dofs;
2065     }
2066 
2067     /* free workspace */
2068     if (qr_needed) {
2069       if (pcbddc->dbg_flag) {
2070         ierr = PetscFree(work);CHKERRQ(ierr);
2071       }
2072       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2073       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2074       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2075       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2076       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2077     }
2078     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2079     /* assembling */
2080     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082     /*
2083     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2084     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2085     */
2086   }
2087 
2088   /* get indices in local ordering for vertices and constraints */
2089   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2090     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2091     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2092   }
2093   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2094   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2095   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2096   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2097   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2098   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2099   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2100   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2101   /* set quantities in PCBDDC data struct */
2102   pcbddc->n_actual_vertices = i;
2103   /* check if a new primal space has been introduced */
2104   pcbddc->new_primal_space_local = PETSC_TRUE;
2105   if (olocal_primal_size == pcbddc->local_primal_size) {
2106     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2107     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2108     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2109   }
2110   /* need to set pcbddc->new_primal_space_local to true when the local primal space is empty, otherwise pcbddc->is_R_local will not be created */
2111   if (!pcbddc->local_primal_size) pcbddc->new_primal_space_local = PETSC_TRUE;
2112   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2113   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2114 
2115   /* flush dbg viewer */
2116   if (pcbddc->dbg_flag) {
2117     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2118   }
2119 
2120   /* free workspace */
2121   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2122   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2123   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2124   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2125   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2126   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2127   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2128   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2134 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2135 {
2136   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2137   PC_IS       *pcis = (PC_IS*)pc->data;
2138   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2139   PetscInt    ierr,i,vertex_size;
2140   PetscViewer viewer=pcbddc->dbg_viewer;
2141 
2142   PetscFunctionBegin;
2143   /* Reset previously computed graph */
2144   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2145   /* Init local Graph struct */
2146   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2147 
2148   /* Check validity of the csr graph passed in by the user */
2149   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2150     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2151   }
2152 
2153   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2154   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2155     Mat mat_adj;
2156     const PetscInt *xadj,*adjncy;
2157     PetscBool flg_row=PETSC_TRUE;
2158 
2159     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2160     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2161     if (!flg_row) {
2162       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2163     }
2164     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2165     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2166     if (!flg_row) {
2167       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2168     }
2169     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2170   }
2171 
2172   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2173   vertex_size = 1;
2174   if (pcbddc->user_provided_isfordofs) {
2175     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2176       ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2177       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2178         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2179         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2180       }
2181       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2182       pcbddc->n_ISForDofs = 0;
2183       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2184     }
2185     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2186     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2187   } else {
2188     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2189       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2190       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2191       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2192         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2193       }
2194     }
2195   }
2196 
2197   /* Setup of Graph */
2198   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2199     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2200   }
2201   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2202     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2203   }
2204   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2205 
2206   /* Graph's connected components analysis */
2207   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2208 
2209   /* print some info to stdout */
2210   if (pcbddc->dbg_flag) {
2211     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2212   }
2213 
2214   /* mark topography has done */
2215   pcbddc->recompute_topography = PETSC_FALSE;
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2221 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2222 {
2223   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2224   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2225   PetscErrorCode ierr;
2226 
2227   PetscFunctionBegin;
2228   n = 0;
2229   vertices = 0;
2230   if (pcbddc->ConstraintMatrix) {
2231     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2232     for (i=0;i<local_primal_size;i++) {
2233       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2234       if (size_of_constraint == 1) n++;
2235       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2236     }
2237     if (vertices_idx) {
2238       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2239       n = 0;
2240       for (i=0;i<local_primal_size;i++) {
2241         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2242         if (size_of_constraint == 1) {
2243           vertices[n++]=row_cmat_indices[0];
2244         }
2245         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2246       }
2247     }
2248   }
2249   *n_vertices = n;
2250   if (vertices_idx) *vertices_idx = vertices;
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2256 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2257 {
2258   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2259   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2260   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2261   PetscBT        touched;
2262   PetscErrorCode ierr;
2263 
2264     /* This function assumes that the number of local constraints per connected component
2265        is not greater than the number of nodes defined for the connected component
2266        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2267   PetscFunctionBegin;
2268   n = 0;
2269   constraints_index = 0;
2270   if (pcbddc->ConstraintMatrix) {
2271     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2272     max_size_of_constraint = 0;
2273     for (i=0;i<local_primal_size;i++) {
2274       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2275       if (size_of_constraint > 1) {
2276         n++;
2277       }
2278       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2279       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2280     }
2281     if (constraints_idx) {
2282       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2283       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2284       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2285       n = 0;
2286       for (i=0;i<local_primal_size;i++) {
2287         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2288         if (size_of_constraint > 1) {
2289           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2290           /* find first untouched local node */
2291           j = 0;
2292           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2293           min_index = row_cmat_global_indices[j];
2294           min_loc = j;
2295           /* search the minimum among nodes not yet touched on the connected component
2296              since there can be more than one constraint on a single cc */
2297           for (j=1;j<size_of_constraint;j++) {
2298             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2299               min_index = row_cmat_global_indices[j];
2300               min_loc = j;
2301             }
2302           }
2303           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2304           constraints_index[n++] = row_cmat_indices[min_loc];
2305         }
2306         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2307       }
2308       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2309       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2310     }
2311   }
2312   *n_constraints = n;
2313   if (constraints_idx) *constraints_idx = constraints_index;
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 /* the next two functions has been adapted from pcis.c */
2318 #undef __FUNCT__
2319 #define __FUNCT__ "PCBDDCApplySchur"
2320 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2321 {
2322   PetscErrorCode ierr;
2323   PC_IS          *pcis = (PC_IS*)(pc->data);
2324 
2325   PetscFunctionBegin;
2326   if (!vec2_B) { vec2_B = v; }
2327   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2328   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2329   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2330   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2331   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2337 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2338 {
2339   PetscErrorCode ierr;
2340   PC_IS          *pcis = (PC_IS*)(pc->data);
2341 
2342   PetscFunctionBegin;
2343   if (!vec2_B) { vec2_B = v; }
2344   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2345   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2346   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2347   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2348   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 
2352 #undef __FUNCT__
2353 #define __FUNCT__ "PCBDDCSubsetNumbering"
2354 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2355 {
2356   Vec            local_vec,global_vec;
2357   IS             seqis,paris;
2358   VecScatter     scatter_ctx;
2359   PetscScalar    *array;
2360   PetscInt       *temp_global_dofs;
2361   PetscScalar    globalsum;
2362   PetscInt       i,j,s;
2363   PetscInt       nlocals,first_index,old_index,max_local;
2364   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2365   PetscMPIInt    *dof_sizes,*dof_displs;
2366   PetscBool      first_found;
2367   PetscErrorCode ierr;
2368 
2369   PetscFunctionBegin;
2370   /* mpi buffers */
2371   MPI_Comm_size(comm,&size_prec_comm);
2372   MPI_Comm_rank(comm,&rank_prec_comm);
2373   j = ( !rank_prec_comm ? size_prec_comm : 0);
2374   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2375   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2376   /* get maximum size of subset */
2377   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2378   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2379   max_local = 0;
2380   if (n_local_dofs) {
2381     max_local = temp_global_dofs[0];
2382     for (i=1;i<n_local_dofs;i++) {
2383       if (max_local < temp_global_dofs[i] ) {
2384         max_local = temp_global_dofs[i];
2385       }
2386     }
2387   }
2388   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2389   max_global++;
2390   max_local = 0;
2391   if (n_local_dofs) {
2392     max_local = local_dofs[0];
2393     for (i=1;i<n_local_dofs;i++) {
2394       if (max_local < local_dofs[i] ) {
2395         max_local = local_dofs[i];
2396       }
2397     }
2398   }
2399   max_local++;
2400   /* allocate workspace */
2401   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2402   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2403   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2404   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2405   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2406   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2407   /* create scatter */
2408   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2409   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2410   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2411   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2412   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2413   /* init array */
2414   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2415   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2416   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2417   if (local_dofs_mult) {
2418     for (i=0;i<n_local_dofs;i++) {
2419       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2420     }
2421   } else {
2422     for (i=0;i<n_local_dofs;i++) {
2423       array[local_dofs[i]]=1.0;
2424     }
2425   }
2426   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2427   /* scatter into global vec and get total number of global dofs */
2428   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2429   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2430   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2431   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2432   /* Fill global_vec with cumulative function for global numbering */
2433   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2434   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2435   nlocals = 0;
2436   first_index = -1;
2437   first_found = PETSC_FALSE;
2438   for (i=0;i<s;i++) {
2439     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2440       first_found = PETSC_TRUE;
2441       first_index = i;
2442     }
2443     nlocals += (PetscInt)PetscRealPart(array[i]);
2444   }
2445   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2446   if (!rank_prec_comm) {
2447     dof_displs[0]=0;
2448     for (i=1;i<size_prec_comm;i++) {
2449       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2450     }
2451   }
2452   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2453   if (first_found) {
2454     array[first_index] += (PetscScalar)nlocals;
2455     old_index = first_index;
2456     for (i=first_index+1;i<s;i++) {
2457       if (PetscRealPart(array[i]) > 0.0) {
2458         array[i] += array[old_index];
2459         old_index = i;
2460       }
2461     }
2462   }
2463   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2464   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2465   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2466   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2467   /* get global ordering of local dofs */
2468   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2469   if (local_dofs_mult) {
2470     for (i=0;i<n_local_dofs;i++) {
2471       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2472     }
2473   } else {
2474     for (i=0;i<n_local_dofs;i++) {
2475       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2476     }
2477   }
2478   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2479   /* free workspace */
2480   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2481   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2482   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2483   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2484   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2485   /* return pointer to global ordering of local dofs */
2486   *global_numbering_subset = temp_global_dofs;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2492 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2493 {
2494   PetscInt       i,j;
2495   PetscScalar    *alphas;
2496   PetscErrorCode ierr;
2497 
2498   PetscFunctionBegin;
2499   /* this implements stabilized Gram-Schmidt */
2500   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2501   for (i=0;i<n;i++) {
2502     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2503     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2504     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2505   }
2506   ierr = PetscFree(alphas);CHKERRQ(ierr);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 /* TODO
2511    - now preallocation is done assuming SEQDENSE local matrices
2512 */
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatISGetMPIXAIJ"
2515 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2516 {
2517   Mat                    new_mat;
2518   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2519   /* info on mat */
2520   /* ISLocalToGlobalMapping rmapping,cmapping; */
2521   PetscInt               bs,rows,cols;
2522   PetscInt               lrows,lcols;
2523   PetscInt               local_rows,local_cols;
2524   PetscBool              isdense;
2525   /* values insertion */
2526   PetscScalar            *array;
2527   PetscInt               *local_indices,*global_indices;
2528   /* work */
2529   PetscInt               i,j,index_row;
2530   PetscErrorCode         ierr;
2531 
2532   PetscFunctionBegin;
2533   /* MISSING CHECKS
2534     - rectangular case not covered (it is not allowed by MATIS)
2535   */
2536   /* get info from mat */
2537   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2538   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2539   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2540   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2541 
2542   /* work */
2543   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2544   for (i=0;i<local_rows;i++) local_indices[i]=i;
2545   /* map indices of local mat to global values */
2546   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2547   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2548   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2549 
2550   if (reuse==MAT_INITIAL_MATRIX) {
2551     Vec         vec_dnz,vec_onz;
2552     PetscScalar *my_dnz,*my_onz;
2553     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2554     PetscInt    index_col,owner;
2555     PetscMPIInt nsubdomains;
2556 
2557     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2558     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2559     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2560     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2561     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2562     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2563     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2564 
2565     /*
2566       preallocation
2567     */
2568 
2569     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2570     /*
2571        Some vectors are needed to sum up properly on shared interface dofs.
2572        Preallocation macros cannot do the job.
2573        Note that preallocation is not exact, since it overestimates nonzeros
2574     */
2575     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2576     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2577     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2578     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2579     /* All processes need to compute entire row ownership */
2580     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2581     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2582     for (i=0;i<nsubdomains;i++) {
2583       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2584         row_ownership[j]=i;
2585       }
2586     }
2587 
2588     /*
2589        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2590        then, they will be summed up properly. This way, preallocation is always sufficient
2591     */
2592     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2593     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2594     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2595     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2596     for (i=0;i<local_rows;i++) {
2597       index_row = global_indices[i];
2598       for (j=i;j<local_rows;j++) {
2599         owner = row_ownership[index_row];
2600         index_col = global_indices[j];
2601         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2602           my_dnz[i] += 1.0;
2603         } else { /* offdiag block */
2604           my_onz[i] += 1.0;
2605         }
2606         /* same as before, interchanging rows and cols */
2607         if (i != j) {
2608           owner = row_ownership[index_col];
2609           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2610             my_dnz[j] += 1.0;
2611           } else {
2612             my_onz[j] += 1.0;
2613           }
2614         }
2615       }
2616     }
2617     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2618     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2619     if (local_rows) { /* multilevel guard */
2620       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2621       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2622     }
2623     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2624     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2625     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2626     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2627     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2628     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2629     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2630 
2631     /* set computed preallocation in dnz and onz */
2632     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2633     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2634     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2635     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2636     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2637     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2638     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2639     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2640 
2641     /* Resize preallocation if overestimated */
2642     for (i=0;i<lrows;i++) {
2643       dnz[i] = PetscMin(dnz[i],lcols);
2644       onz[i] = PetscMin(onz[i],cols-lcols);
2645     }
2646     /* set preallocation */
2647     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2648     for (i=0;i<lrows/bs;i++) {
2649       dnz[i] = dnz[i*bs]/bs;
2650       onz[i] = onz[i*bs]/bs;
2651     }
2652     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2653     for (i=0;i<lrows/bs;i++) {
2654       dnz[i] = dnz[i]-i;
2655     }
2656     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2657     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2658     *M = new_mat;
2659   } else {
2660     PetscInt mbs,mrows,mcols;
2661     /* some checks */
2662     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2663     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2664     if (mrows != rows) {
2665       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2666     }
2667     if (mrows != rows) {
2668       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2669     }
2670     if (mbs != bs) {
2671       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2672     }
2673     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2674   }
2675   /* set local to global mappings */
2676   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2677   /* Set values */
2678   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2679   if (isdense) { /* special case for dense local matrices */
2680     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2681     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2682     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2683     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2684     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2685     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2686   } else { /* very basic values insertion for all other matrix types */
2687     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2688     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2689     for (i=0;i<local_rows;i++) {
2690       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2691       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2692       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2693       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2694       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2695       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2696     }
2697   }
2698   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2699   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2700   if (isdense) {
2701     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2702   }
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatISGetSubassemblingPattern"
2708 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2709 {
2710   Mat             subdomain_adj;
2711   IS              new_ranks,ranks_send_to;
2712   MatPartitioning partitioner;
2713   Mat_IS          *matis;
2714   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2715   PetscInt        prank;
2716   PetscMPIInt     size,rank,color;
2717   PetscInt        *xadj,*adjncy,*oldranks;
2718   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2719   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2720   PetscErrorCode  ierr;
2721   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2722   PetscSubcomm    subcomm;
2723 
2724   PetscFunctionBegin;
2725   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2726   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2727   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2728 
2729   /* Get info on mapping */
2730   matis = (Mat_IS*)(mat->data);
2731   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2732   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2733 
2734   /* build local CSR graph of subdomains' connectivity */
2735   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2736   xadj[0] = 0;
2737   xadj[1] = PetscMax(n_neighs-1,0);
2738   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2739   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2740 
2741   if (threshold) {
2742     PetscInt* count,min_threshold;
2743     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2744     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2745     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2746       for (j=0;j<n_shared[i];j++) {
2747         count[shared[i][j]] += 1;
2748       }
2749     }
2750     /* adapt threshold since we dont want to lose significant connections */
2751     min_threshold = n_neighs;
2752     for (i=1;i<n_neighs;i++) {
2753       for (j=0;j<n_shared[i];j++) {
2754         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2755       }
2756     }
2757     threshold = PetscMax(min_threshold+1,threshold);
2758     xadj[1] = 0;
2759     for (i=1;i<n_neighs;i++) {
2760       for (j=0;j<n_shared[i];j++) {
2761         if (count[shared[i][j]] < threshold) {
2762           adjncy[xadj[1]] = neighs[i];
2763           adjncy_wgt[xadj[1]] = n_shared[i];
2764           xadj[1]++;
2765           break;
2766         }
2767       }
2768     }
2769     ierr = PetscFree(count);CHKERRQ(ierr);
2770   } else {
2771     if (xadj[1]) {
2772       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2773       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2774     }
2775   }
2776   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2777   if (use_square) {
2778     for (i=0;i<xadj[1];i++) {
2779       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2780     }
2781   }
2782   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2783 
2784   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2785 
2786   /*
2787     Restrict work on active processes only.
2788   */
2789   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2790   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2791   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2792   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2793   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2794   if (color) {
2795     ierr = PetscFree(xadj);CHKERRQ(ierr);
2796     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2797     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2798   } else {
2799     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2800     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2801     prank = rank;
2802     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2803     /*
2804     for (i=0;i<size;i++) {
2805       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2806     }
2807     */
2808     for (i=0;i<xadj[1];i++) {
2809       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2810     }
2811     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2812     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2813     n_subdomains = (PetscInt)size;
2814     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2815 
2816     /* Partition */
2817     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2818     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2819     if (use_vwgt) {
2820       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2821       v_wgt[0] = local_size;
2822       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2823     }
2824     /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */
2825     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2826     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2827     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2828     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2829 
2830     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2831     /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */
2832     ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2833     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2834     /* clean up */
2835     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2836     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2837     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2838     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2839   }
2840   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2841 
2842   /* assemble parallel IS for sends */
2843   i = 1;
2844   if (color) i=0;
2845   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2846 
2847   /* get back IS */
2848   *is_sends = ranks_send_to;
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatISSubassemble"
2856 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n)
2857 {
2858   Mat                    local_mat;
2859   Mat_IS                 *matis;
2860   IS                     is_sends_internal;
2861   PetscInt               rows,cols;
2862   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2863   PetscBool              ismatis,isdense;
2864   ISLocalToGlobalMapping l2gmap;
2865   PetscInt*              l2gmap_indices;
2866   const PetscInt*        is_indices;
2867   MatType                new_local_type;
2868   MatTypePrivate         new_local_type_private;
2869   /* buffers */
2870   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2871   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2872   /* MPI */
2873   MPI_Comm               comm;
2874   PetscMPIInt            n_sends,n_recvs,commsize;
2875   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2876   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2877   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2878   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2879   PetscErrorCode         ierr;
2880 
2881   PetscFunctionBegin;
2882   /* checks */
2883   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2884   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2885   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2886   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2887   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2888   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2889   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2890   if (reuse == MAT_REUSE_MATRIX) {
2891     PetscInt mrows,mcols,mnrows,mncols;
2892     PetscBool ismatis;
2893     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2894     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2895     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2896     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2897     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2898     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2899   }
2900   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2901   PetscValidLogicalCollectiveInt(mat,bs,0);
2902   /* prepare IS for sending if not provided */
2903   if (!is_sends) {
2904     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2905     ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2906   } else {
2907     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2908     is_sends_internal = is_sends;
2909   }
2910 
2911   /* get pointer of MATIS data */
2912   matis = (Mat_IS*)mat->data;
2913 
2914   /* get comm */
2915   comm = PetscObjectComm((PetscObject)mat);
2916 
2917   /* compute number of sends */
2918   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2919   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2920 
2921   /* compute number of receives */
2922   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2923   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2924   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2925   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2926   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2927   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2928   ierr = PetscFree(iflags);CHKERRQ(ierr);
2929 
2930   /* prepare send/receive buffers */
2931   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2932   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2933   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2934   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2935 
2936   /* Get data from local mat */
2937   if (!isdense) {
2938     /* TODO: See below some guidelines on how to prepare the local buffers */
2939     /*
2940        send_buffer_vals should contain the raw values of the local matrix
2941        send_buffer_idxs should contain:
2942        - MatType_PRIVATE type
2943        - PetscInt        size_of_l2gmap
2944        - PetscInt        global_row_indices[size_of_l2gmap]
2945        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2946     */
2947   } else {
2948     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2949     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2950     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2951     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2952     send_buffer_idxs[1] = i;
2953     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2954     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2955     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2956     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2957     for (i=0;i<n_sends;i++) {
2958       ilengths_vals[is_indices[i]] = len*len;
2959       ilengths_idxs[is_indices[i]] = len+2;
2960     }
2961   }
2962   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2963   buf_size_idxs = 0;
2964   buf_size_vals = 0;
2965   for (i=0;i<n_recvs;i++) {
2966     buf_size_idxs += (PetscInt)olengths_idxs[i];
2967     buf_size_vals += (PetscInt)olengths_vals[i];
2968   }
2969   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2970   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2971 
2972   /* get new tags for clean communications */
2973   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2974   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2975 
2976   /* allocate for requests */
2977   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2978   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2979   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2980   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2981 
2982   /* communications */
2983   ptr_idxs = recv_buffer_idxs;
2984   ptr_vals = recv_buffer_vals;
2985   for (i=0;i<n_recvs;i++) {
2986     source_dest = onodes[i];
2987     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2988     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2989     ptr_idxs += olengths_idxs[i];
2990     ptr_vals += olengths_vals[i];
2991   }
2992   for (i=0;i<n_sends;i++) {
2993     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2994     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
2995     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
2996   }
2997   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2998   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
2999 
3000   /* assemble new l2g map */
3001   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3002   ptr_idxs = recv_buffer_idxs;
3003   buf_size_idxs = 0;
3004   for (i=0;i<n_recvs;i++) {
3005     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3006     ptr_idxs += olengths_idxs[i];
3007   }
3008   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
3009   ptr_idxs = recv_buffer_idxs;
3010   buf_size_idxs = 0;
3011   for (i=0;i<n_recvs;i++) {
3012     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3013     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3014     ptr_idxs += olengths_idxs[i];
3015   }
3016   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3017   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3018   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3019 
3020   /* infer new local matrix type from received local matrices type */
3021   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3022   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3023   new_local_type_private = MATAIJ_PRIVATE;
3024   new_local_type = MATSEQAIJ;
3025   if (n_recvs) {
3026     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3027     ptr_idxs = recv_buffer_idxs;
3028     for (i=0;i<n_recvs;i++) {
3029       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3030         new_local_type_private = MATAIJ_PRIVATE;
3031         break;
3032       }
3033       ptr_idxs += olengths_idxs[i];
3034     }
3035     switch (new_local_type_private) {
3036       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3037         new_local_type = MATSEQAIJ;
3038         bs = 1;
3039         break;
3040       case MATAIJ_PRIVATE:
3041         new_local_type = MATSEQAIJ;
3042         bs = 1;
3043         break;
3044       case MATBAIJ_PRIVATE:
3045         new_local_type = MATSEQBAIJ;
3046         break;
3047       case MATSBAIJ_PRIVATE:
3048         new_local_type = MATSEQSBAIJ;
3049         break;
3050       default:
3051         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3052         break;
3053     }
3054   }
3055 
3056   /* create MATIS object if needed */
3057   if (reuse == MAT_INITIAL_MATRIX) {
3058     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3059     ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3060   } else {
3061     /* it also destroys the local matrices */
3062     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3063   }
3064   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3065   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3066   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3067   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3068 
3069   /* set values */
3070   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3071   ptr_vals = recv_buffer_vals;
3072   ptr_idxs = recv_buffer_idxs;
3073   for (i=0;i<n_recvs;i++) {
3074     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3075       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3076       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3077       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3078       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3079       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3080     }
3081     ptr_idxs += olengths_idxs[i];
3082     ptr_vals += olengths_vals[i];
3083   }
3084   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3085   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3086   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3087   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3088 
3089   { /* check */
3090     Vec       lvec,rvec;
3091     PetscReal infty_error;
3092 
3093     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3094     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3095     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3096     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3097     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3098     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3099     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3100     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3101     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3102   }
3103 
3104   /* free workspace */
3105   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3106   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3107   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3108   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3109   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3110   if (isdense) {
3111     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3112     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3113   } else {
3114     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3115   }
3116   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3117   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3118   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3119   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3120   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3121   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3122   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3123   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3124   ierr = PetscFree(onodes);CHKERRQ(ierr);
3125   PetscFunctionReturn(0);
3126 }
3127 
3128 #undef __FUNCT__
3129 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3130 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3131 {
3132   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3133   PC_IS                  *pcis = (PC_IS*)pc->data;
3134   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3135   MatNullSpace           CoarseNullSpace=NULL;
3136   ISLocalToGlobalMapping coarse_islg;
3137   IS                     coarse_is;
3138   PetscInt               max_it;
3139   PetscInt               im_active=-1,active_procs=-1;
3140   PC                     pc_temp;
3141   PCType                 coarse_pc_type;
3142   KSPType                coarse_ksp_type;
3143   PetscBool              multilevel_requested,multilevel_allowed;
3144   PetscBool              setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse;
3145   MatStructure           matstruct;
3146   PetscErrorCode         ierr;
3147 
3148   PetscFunctionBegin;
3149   /* Assign global numbering to coarse dofs */
3150   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3151     PetscInt ocoarse_size;
3152     ocoarse_size = pcbddc->coarse_size;
3153     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3154     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3155     /* see if we can avoid some work */
3156     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3157       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3158         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3159         coarse_reuse = PETSC_FALSE;
3160       } else { /* we can safely reuse already computed coarse matrix */
3161         coarse_reuse = PETSC_TRUE;
3162       }
3163     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3164       coarse_reuse = PETSC_FALSE;
3165     }
3166     /* reset any subassembling information */
3167     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3168   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3169     coarse_reuse = PETSC_TRUE;
3170   }
3171 
3172   /* test if we can go multilevel */
3173   multilevel_allowed = PETSC_FALSE;
3174   multilevel_requested = PETSC_FALSE;
3175   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3176   if (multilevel_requested) {
3177     /* count "active processes" */
3178     im_active = !!(pcis->n);
3179     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3180     if (active_procs/pcbddc->coarsening_ratio < 2) {
3181       multilevel_allowed = PETSC_FALSE;
3182     } else {
3183       multilevel_allowed = PETSC_TRUE;
3184     }
3185   }
3186 
3187   /* set defaults for coarse KSP and PC */
3188   if (multilevel_allowed) {
3189     coarse_ksp_type = KSPRICHARDSON;
3190     coarse_pc_type = PCBDDC;
3191   } else {
3192     coarse_ksp_type = KSPPREONLY;
3193     coarse_pc_type = PCREDUNDANT;
3194   }
3195 
3196   /* create the coarse KSP object only once with defaults */
3197   if (!pcbddc->coarse_ksp) {
3198     char prefix[256],str_level[3];
3199     size_t len;
3200     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3201     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3202     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3203     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3204     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3205     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3206     /* prefix */
3207     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3208     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3209     if (!pcbddc->current_level) {
3210       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3211       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3212     } else {
3213       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3214       if (pcbddc->current_level>1) len -= 2;
3215       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3216       *(prefix+len)='\0';
3217       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3218       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3219     }
3220     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3221   }
3222   /* allow user customization */
3223   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3224   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3225   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3226 
3227   /* get some info after set from options */
3228   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3229   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3230   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3231   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3232     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3233     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3234     isbddc = PETSC_FALSE;
3235   }
3236 
3237   /* creates temporary l2gmap and IS for coarse indexes */
3238   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3239   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3240 
3241   /* propagate BDDC info to the next level */
3242   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3243   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3244   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3245   if (isbddc) { /* protects from unneded computations */
3246     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3247     const PetscInt         *idxs;
3248     IS                     *c_isfordofs,c_isneumann;
3249     ISLocalToGlobalMapping tmap;
3250 
3251     /* create map between primal indices (in local representative ordering) and local subdomain numbering */
3252     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3253     /* allocate space for temporary storage */
3254     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3255     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3256     /* dofs splitting */
3257     ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr);
3258     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3259       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3260       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3261       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3262       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3263       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3264       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3265       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr);
3266       /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */
3267     }
3268     ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr);
3269     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3270       ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr);
3271     }
3272     ierr = PetscFree(c_isfordofs);CHKERRQ(ierr);
3273     /* neumann boundaries */
3274     if (pcbddc->NeumannBoundariesLocal) {
3275       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3276       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3277       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3278       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3279       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3280       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3281       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr);
3282       /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */
3283       ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr);
3284     }
3285     ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr);
3286     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3287     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3288     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3289   }
3290 
3291   /* creates temporary MATIS object for coarse matrix */
3292   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3293   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3294   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3295   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3296   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3297   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3298   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3299 
3300   /* assemble coarse matrix */
3301   if (isbddc || isnn) {
3302     if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3303       ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3304       if (pcbddc->dbg_flag) {
3305         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3306         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3307         ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr);
3308       }
3309     }
3310     if (coarse_reuse) {
3311       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3312       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3313       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3314     } else {
3315       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3316     }
3317   } else {
3318     if (coarse_reuse) {
3319       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3320       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3321       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3322     } else {
3323       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3324     }
3325   }
3326   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3327 
3328   /* create local to global scatters for coarse problem */
3329   if (pcbddc->new_primal_space) {
3330     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3331     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3332     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3333     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3334     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3335   }
3336   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3337 
3338   /* propagate symmetry info to coarse matrix */
3339   issym = PETSC_FALSE;
3340   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3341   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3342   isherm = PETSC_FALSE;
3343   ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr);
3344   ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3345 
3346   /* Compute coarse null space (special handling by BDDC only) */
3347   if (pcbddc->NullSpace) {
3348     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3349     if (isbddc) {
3350       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3351     } else {
3352       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3353     }
3354   }
3355 
3356   /* set operators */
3357   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3358   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3359 
3360   /* additional KSP customization */
3361   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3362   if (max_it < 5) {
3363     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3364   }
3365 
3366   /* print some info if requested */
3367   if (pcbddc->dbg_flag) {
3368     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3369     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3370     if (!multilevel_allowed) {
3371       if (multilevel_requested) {
3372         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3373       } else if (pcbddc->max_levels) {
3374         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3375       }
3376     }
3377     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3378     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3379   }
3380 
3381   /* setup coarse ksp */
3382   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3383 
3384   /* Check coarse problem if in debug mode or if solving with an iterative method */
3385   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3386   if (pcbddc->dbg_flag || !ispreonly) {
3387     KSP       check_ksp;
3388     KSPType   check_ksp_type;
3389     PC        check_pc;
3390     Vec       check_vec;
3391     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3392     PetscInt  its;
3393     PetscBool compute;
3394 
3395     /* Create ksp object suitable for estimation of extreme eigenvalues */
3396     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3397     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3398     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3399     if (ispreonly) {
3400       check_ksp_type = KSPPREONLY;
3401       compute = PETSC_FALSE;
3402     } else {
3403       check_ksp_type = KSPGMRES;
3404       compute = PETSC_TRUE;
3405     }
3406     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3407     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3408     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3409     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3410     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3411     /* create random vec */
3412     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3413     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3414     if (CoarseNullSpace) {
3415       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3416     }
3417     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3418     /* solve coarse problem */
3419     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3420     if (CoarseNullSpace) {
3421       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3422     }
3423     /* set eigenvalue estimation if preonly has not been requested */
3424     if (!ispreonly) {
3425       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3426       /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */
3427       ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3428       ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3429     }
3430     /* check coarse problem residual error */
3431     if (pcbddc->dbg_flag) {
3432       ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3433       ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3434       ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3435       ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3436       ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3437       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3438       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3439       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3440       if (!ispreonly) {
3441         ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3442         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3443       }
3444       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3445     }
3446     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3447   }
3448 
3449   /* print additional info */
3450   if (pcbddc->dbg_flag) {
3451     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3452     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3453     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3454     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3455   }
3456 
3457   /* free memory */
3458   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3459   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3460   PetscFunctionReturn(0);
3461 }
3462 
3463 #undef __FUNCT__
3464 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3465 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3466 {
3467   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3468   PC_IS*         pcis = (PC_IS*)pc->data;
3469   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3470   PetscInt       i,coarse_size;
3471   PetscInt       *local_primal_indices;
3472   PetscErrorCode ierr;
3473 
3474   PetscFunctionBegin;
3475   /* Compute global number of coarse dofs */
3476   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3477     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3478   }
3479   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3480 
3481   /* check numbering */
3482   if (pcbddc->dbg_flag) {
3483     PetscScalar coarsesum,*array;
3484 
3485     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3486     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3487     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3488     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3489     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3490     for (i=0;i<pcbddc->local_primal_size;i++) {
3491       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3492     }
3493     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3494     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3495     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3496     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3497     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3498     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3499     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3500     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3501     for (i=0;i<pcis->n;i++) {
3502       if (array[i] == 1.0) {
3503         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3504       }
3505     }
3506     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3507     for (i=0;i<pcis->n;i++) {
3508       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3509     }
3510     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3511     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3512     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3513     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3514     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3515     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3516     if (pcbddc->dbg_flag > 1) {
3517       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3518       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3519       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3520       for (i=0;i<pcbddc->local_primal_size;i++) {
3521         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3522       }
3523       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3524     }
3525     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3526   }
3527   /* get back data */
3528   *coarse_size_n = coarse_size;
3529   *local_primal_indices_n = local_primal_indices;
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "PCBDDCGlobalToLocal"
3535 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3536 {
3537   PC_IS*         pcis = (PC_IS*)pc->data;
3538   IS             localis_t;
3539   PetscInt       i,lsize,*idxs;
3540   PetscScalar    *vals;
3541   PetscErrorCode ierr;
3542 
3543   PetscFunctionBegin;
3544   /* get dirichlet indices in local ordering exploiting local to global map */
3545   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3546   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3547   for (i=0;i<lsize;i++) vals[i] = 1.0;
3548   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3549   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3550   if (idxs) { /* multilevel guard */
3551     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3552   }
3553   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3554   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3555   ierr = PetscFree(vals);CHKERRQ(ierr);
3556   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3557   /* now compute dirichlet set in local ordering */
3558   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3559   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3560   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3561   for (i=0,lsize=0;i<pcis->n;i++) {
3562     if (vals[i] == 1.0) {
3563       lsize++;
3564     }
3565   }
3566   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3567   for (i=0,lsize=0;i<pcis->n;i++) {
3568     if (vals[i] == 1.0) {
3569       idxs[lsize++] = i;
3570     }
3571   }
3572   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3573   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3574   *localis = localis_t;
3575   PetscFunctionReturn(0);
3576 }
3577