xref: /petsc/src/ksp/pc/impls/bddc/bddcprivate.c (revision b23d619e14951cdf40e81ee09fccde31aaba70a6)
1 #include "bddc.h"
2 #include "bddcprivate.h"
3 #include <petscblaslapack.h>
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "PCBDDCSetUpSolvers"
7 PetscErrorCode PCBDDCSetUpSolvers(PC pc)
8 {
9   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
10   PetscScalar    *coarse_submat_vals;
11   PetscErrorCode ierr;
12 
13   PetscFunctionBegin;
14   /* Compute matrix after change of basis and extract local submatrices */
15   ierr = PCBDDCSetUpLocalMatrices(pc);CHKERRQ(ierr);
16 
17   /* Setup local scatters R_to_B and (optionally) R_to_D */
18   /* PCBDDCSetUpLocalWorkVectors and PCBDDCSetUpLocalMatrices should be called first! */
19   ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr);
20 
21   /* Setup local solvers ksp_D and ksp_R */
22   /* PCBDDCSetUpLocalScatters should be called first! */
23   ierr = PCBDDCSetUpLocalSolvers(pc);CHKERRQ(ierr);
24 
25   /* Change global null space passed in by the user if change of basis has been requested */
26   if (pcbddc->NullSpace && pcbddc->use_change_of_basis) {
27     ierr = PCBDDCNullSpaceAdaptGlobal(pc);CHKERRQ(ierr);
28   }
29 
30   /*
31      Setup local correction and local part of coarse basis.
32      Gives back the dense local part of the coarse matrix in column major ordering
33   */
34   ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr);
35 
36   /* Compute total number of coarse nodes and setup coarse solver */
37   ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr);
38 
39   /* free */
40   ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr);
41   PetscFunctionReturn(0);
42 }
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "PCBDDCResetCustomization"
46 PetscErrorCode PCBDDCResetCustomization(PC pc)
47 {
48   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
49   PetscErrorCode ierr;
50 
51   PetscFunctionBegin;
52   ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
53   ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr);
54   ierr = MatNullSpaceDestroy(&pcbddc->NullSpace);CHKERRQ(ierr);
55   ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr);
56   ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
57   ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr);
58   ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
59   ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
60   ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
61   ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr);
62   ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "PCBDDCResetTopography"
68 PetscErrorCode PCBDDCResetTopography(PC pc)
69 {
70   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
75   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
76   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "PCBDDCResetSolvers"
82 PetscErrorCode PCBDDCResetSolvers(PC pc)
83 {
84   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
85   PetscErrorCode ierr;
86 
87   PetscFunctionBegin;
88   ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
89   ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
90   ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
91   ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
92   ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
93   ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
94   ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
95   ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
96   ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
97   ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
98   ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
99   ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
100   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
101   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
102   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
103   ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
104   ierr = KSPDestroy(&pcbddc->ksp_D);CHKERRQ(ierr);
105   ierr = KSPDestroy(&pcbddc->ksp_R);CHKERRQ(ierr);
106   ierr = KSPDestroy(&pcbddc->coarse_ksp);CHKERRQ(ierr);
107   ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
108   ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
109   ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
110   ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
111   PetscFunctionReturn(0);
112 }
113 
114 #undef __FUNCT__
115 #define __FUNCT__ "PCBDDCSetUpLocalWorkVectors"
116 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
117 {
118   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
119   PC_IS          *pcis = (PC_IS*)pc->data;
120   VecType        impVecType;
121   PetscInt       n_constraints,n_R,old_size;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   if (!pcbddc->ConstraintMatrix) {
126     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Constraint matrix has not been created");
127   }
128   /* get sizes */
129   n_constraints = pcbddc->local_primal_size - pcbddc->n_actual_vertices;
130   n_R = pcis->n-pcbddc->n_actual_vertices;
131   ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr);
132   /* local work vectors (try to avoid unneeded work)*/
133   /* R nodes */
134   old_size = -1;
135   if (pcbddc->vec1_R) {
136     ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr);
137   }
138   if (n_R != old_size) {
139     ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr);
140     ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr);
141     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr);
142     ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr);
143     ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr);
144     ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr);
145   }
146   /* local primal dofs */
147   old_size = -1;
148   if (pcbddc->vec1_P) {
149     ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr);
150   }
151   if (pcbddc->local_primal_size != old_size) {
152     ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr);
153     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr);
154     ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr);
155     ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr);
156   }
157   /* local explicit constraints */
158   old_size = -1;
159   if (pcbddc->vec1_C) {
160     ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr);
161   }
162   if (n_constraints && n_constraints != old_size) {
163     ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr);
164     ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr);
165     ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr);
166     ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "PCBDDCSetUpCorrection"
173 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
174 {
175   PetscErrorCode         ierr;
176   /* pointers to pcis and pcbddc */
177   PC_IS*                 pcis = (PC_IS*)pc->data;
178   PC_BDDC*               pcbddc = (PC_BDDC*)pc->data;
179   /* submatrices of local problem */
180   Mat                    A_RV,A_VR,A_VV;
181   /* working matrices */
182   Mat                    M1,M2,M3,C_CR;
183   /* working vectors */
184   Vec                    vec1_C,vec2_C,vec1_V,vec2_V;
185   /* additional working stuff */
186   IS                     is_aux;
187   PetscScalar            *coarse_submat_vals; /* TODO: use a PETSc matrix */
188   const PetscScalar      *array,*row_cmat_values;
189   const PetscInt         *row_cmat_indices,*idx_R_local;
190   PetscInt               *idx_V_B,*auxindices;
191   PetscInt               n_vertices,n_constraints,size_of_constraint;
192   PetscInt               i,j,n_R,n_D,n_B;
193   PetscBool              setsym=PETSC_FALSE,issym=PETSC_FALSE;
194   /* matrix type (vector type propagated downstream from vec1_C and local matrix type) */
195   MatType                impMatType;
196   /* some shortcuts to scalars */
197   PetscScalar            zero=0.0,one=1.0,m_one=-1.0;
198   /* for debugging purposes */
199   PetscReal              *coarsefunctions_errors,*constraints_errors;
200 
201   PetscFunctionBegin;
202   /* get number of vertices (corners plus constraints with change of basis)
203      pcbddc->n_actual_vertices stores the actual number of vertices, pcbddc->n_vertices the number of corners computed */
204   n_vertices = pcbddc->n_actual_vertices;
205   n_constraints = pcbddc->local_primal_size-n_vertices;
206   /* Set Non-overlapping dimensions */
207   n_B = pcis->n_B; n_D = pcis->n - n_B;
208   n_R = pcis->n-n_vertices;
209 
210   /* Set types for local objects needed by BDDC precondtioner */
211   impMatType = MATSEQDENSE;
212 
213   /* Allocating some extra storage just to be safe */
214   ierr = PetscMalloc (pcis->n*sizeof(PetscInt),&auxindices);CHKERRQ(ierr);
215   for (i=0;i<pcis->n;i++) auxindices[i]=i;
216 
217   /* vertices in boundary numbering */
218   ierr = PetscMalloc(n_vertices*sizeof(PetscInt),&idx_V_B);CHKERRQ(ierr);
219   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->primal_indices_local_idxs,&i,idx_V_B);CHKERRQ(ierr);
220   if (i != n_vertices) {
221     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",n_vertices,i);
222   }
223 
224   /* Precompute stuffs needed for preprocessing and application of BDDC*/
225   if (n_constraints) {
226     /* see if we can save some allocations */
227     if (pcbddc->local_auxmat2) {
228       PetscInt on_R,on_constraints;
229       ierr = MatGetSize(pcbddc->local_auxmat2,&on_R,&on_constraints);CHKERRQ(ierr);
230       if (on_R != n_R || on_constraints != n_constraints) {
231         ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr);
232         ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr);
233       }
234     }
235     /* work vectors */
236     ierr = VecDuplicate(pcbddc->vec1_C,&vec1_C);CHKERRQ(ierr);
237     ierr = VecDuplicate(pcbddc->vec1_C,&vec2_C);CHKERRQ(ierr);
238     /* auxiliary matrices */
239     if (!pcbddc->local_auxmat2) {
240       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->local_auxmat2);CHKERRQ(ierr);
241       ierr = MatSetSizes(pcbddc->local_auxmat2,n_R,n_constraints,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
242       ierr = MatSetType(pcbddc->local_auxmat2,impMatType);CHKERRQ(ierr);
243       ierr = MatSetUp(pcbddc->local_auxmat2);CHKERRQ(ierr);
244     }
245 
246     /* Extract constraints on R nodes: C_{CR}  */
247     ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr);
248     ierr = MatGetSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr);
249     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
250 
251     /* Assemble local_auxmat2 = - A_{RR}^{-1} C^T_{CR} needed by BDDC application */
252     for (i=0;i<n_constraints;i++) {
253       ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
254       /* Get row of constraint matrix in R numbering */
255       ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
256       ierr = VecSetValues(pcbddc->vec1_R,size_of_constraint,row_cmat_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
257       ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr);
258       ierr = VecAssemblyBegin(pcbddc->vec1_R);CHKERRQ(ierr);
259       ierr = VecAssemblyEnd(pcbddc->vec1_R);CHKERRQ(ierr);
260       /* Solve for row of constraint matrix in R numbering */
261       ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
262       /* Set values in local_auxmat2 */
263       ierr = VecGetArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
264       ierr = MatSetValues(pcbddc->local_auxmat2,n_R,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
265       ierr = VecRestoreArrayRead(pcbddc->vec2_R,&array);CHKERRQ(ierr);
266     }
267     ierr = MatAssemblyBegin(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
268     ierr = MatAssemblyEnd(pcbddc->local_auxmat2,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
269     ierr = MatScale(pcbddc->local_auxmat2,m_one);CHKERRQ(ierr);
270 
271     /* Assemble explicitly M1 = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} needed in preproc  */
272     ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr);
273     ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr);
274     ierr = MatCreate(PETSC_COMM_SELF,&M1);CHKERRQ(ierr);
275     ierr = MatSetSizes(M1,n_constraints,n_constraints,n_constraints,n_constraints);CHKERRQ(ierr);
276     ierr = MatSetType(M1,impMatType);CHKERRQ(ierr);
277     ierr = MatSetUp(M1);CHKERRQ(ierr);
278     ierr = MatDuplicate(M1,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr);
279     ierr = MatZeroEntries(M2);CHKERRQ(ierr);
280     ierr = VecSet(vec1_C,m_one);CHKERRQ(ierr);
281     ierr = MatDiagonalSet(M2,vec1_C,INSERT_VALUES);CHKERRQ(ierr);
282     ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr);
283     ierr = MatDestroy(&M2);CHKERRQ(ierr);
284     ierr = MatDestroy(&M3);CHKERRQ(ierr);
285     /* Assemble local_auxmat1 = M1*C_{CR} needed by BDDC application in KSP and in preproc */
286     if (!pcbddc->local_auxmat1) {
287       ierr = MatMatMult(M1,C_CR,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
288     } else {
289       ierr = MatMatMult(M1,C_CR,MAT_REUSE_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr);
290     }
291   }
292 
293   /* Get submatrices from subdomain matrix */
294   if (n_vertices) {
295     PetscInt ibs,mbs;
296     PetscBool issbaij;
297     Mat newmat;
298 
299     ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr);
300     ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
301     ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
302     if (ibs != mbs) { /* need to convert to SEQAIJ */
303       ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
304       ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
305       ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
306       ierr = MatGetSubMatrix(newmat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
307       ierr = MatDestroy(&newmat);CHKERRQ(ierr);
308     } else {
309       /* this is safe */
310       ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr);
311       ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
312       if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
313         ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
314         /* which of the two approaches is faster? */
315         /* ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
316         ierr = MatCreateTranspose(A_RV,&A_VR);CHKERRQ(ierr);*/
317         ierr = MatGetSubMatrix(newmat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
318         ierr = MatCreateTranspose(A_VR,&A_RV);CHKERRQ(ierr);
319         ierr = MatDestroy(&newmat);CHKERRQ(ierr);
320       } else {
321         ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr);
322         ierr = MatGetSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr);
323       }
324     }
325     ierr = MatGetVecs(A_RV,&vec1_V,NULL);CHKERRQ(ierr);
326     ierr = VecDuplicate(vec1_V,&vec2_V);CHKERRQ(ierr);
327     ierr = ISDestroy(&is_aux);CHKERRQ(ierr);
328   }
329 
330   /* Matrix of coarse basis functions (local) */
331   if (pcbddc->coarse_phi_B) {
332     PetscInt on_B,on_primal;
333     ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr);
334     if (on_B != n_B || on_primal != pcbddc->local_primal_size) {
335       ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr);
336       ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr);
337     }
338   }
339   if (pcbddc->coarse_phi_D) {
340     PetscInt on_D,on_primal;
341     ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,&on_primal);CHKERRQ(ierr);
342     if (on_D != n_D || on_primal != pcbddc->local_primal_size) {
343       ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr);
344       ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr);
345     }
346   }
347   if (!pcbddc->coarse_phi_B) {
348     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_B);CHKERRQ(ierr);
349     ierr = MatSetSizes(pcbddc->coarse_phi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
350     ierr = MatSetType(pcbddc->coarse_phi_B,impMatType);CHKERRQ(ierr);
351     ierr = MatSetUp(pcbddc->coarse_phi_B);CHKERRQ(ierr);
352   }
353   if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_phi_D ) {
354     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_phi_D);CHKERRQ(ierr);
355     ierr = MatSetSizes(pcbddc->coarse_phi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
356     ierr = MatSetType(pcbddc->coarse_phi_D,impMatType);CHKERRQ(ierr);
357     ierr = MatSetUp(pcbddc->coarse_phi_D);CHKERRQ(ierr);
358   }
359 
360   if (pcbddc->dbg_flag) {
361     ierr = ISGetIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
362     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*coarsefunctions_errors),&coarsefunctions_errors);CHKERRQ(ierr);
363     ierr = PetscMalloc(2*pcbddc->local_primal_size*sizeof(*constraints_errors),&constraints_errors);CHKERRQ(ierr);
364   }
365   /* Subdomain contribution (Non-overlapping) to coarse matrix  */
366   ierr = PetscMalloc((pcbddc->local_primal_size)*(pcbddc->local_primal_size)*sizeof(PetscScalar),&coarse_submat_vals);CHKERRQ(ierr);
367 
368   /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
369 
370   /* vertices */
371   for (i=0;i<n_vertices;i++) {
372     /* this should not be needed, but MatMult_BAIJ is broken when using compressed row routines */
373     ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr); /* TODO: REMOVE IT */
374     ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
375     ierr = VecSetValue(vec1_V,i,one,INSERT_VALUES);CHKERRQ(ierr);
376     ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
377     ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
378     /* simplified solution of saddle point problem with null rhs on constraints multipliers */
379     ierr = MatMult(A_RV,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
380     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
381     ierr = VecScale(pcbddc->vec1_R,m_one);CHKERRQ(ierr);
382     if (n_constraints) {
383       ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec1_R,vec1_C);CHKERRQ(ierr);
384       ierr = MatMultAdd(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
385       ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
386     }
387     ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
388     ierr = MatMultAdd(A_VV,vec1_V,vec2_V,vec2_V);CHKERRQ(ierr);
389 
390     /* Set values in coarse basis function and subdomain part of coarse_mat */
391     /* coarse basis functions */
392     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
393     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
394     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
395     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
396     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
397     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
398     ierr = MatSetValue(pcbddc->coarse_phi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
399     if (pcbddc->switch_static || pcbddc->dbg_flag) {
400       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
401       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
402       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
403       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
404       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
405     }
406     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
407     ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
408     ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
409     ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
410     if (n_constraints) {
411       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
412       ierr = PetscMemcpy(&coarse_submat_vals[i*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
413       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
414     }
415 
416     /* check */
417     if (pcbddc->dbg_flag) {
418       /* assemble subdomain vector on local nodes */
419       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
420       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
421       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
422       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
423       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
424       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
425       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
426       /* assemble subdomain vector of lagrange multipliers (i.e. primal nodes) */
427       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
428       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
429       ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
430       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
431       if (n_constraints) {
432         ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
433         ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
434         ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
435       }
436       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
437       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
438       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
439       /* check saddle point solution */
440       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
441       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
442       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i]);CHKERRQ(ierr);
443       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
444       /* shift by the identity matrix */
445       ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
446       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
447       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
448       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i]);CHKERRQ(ierr);
449     }
450   }
451 
452   /* constraints */
453   for (i=0;i<n_constraints;i++) {
454     ierr = VecSet(vec2_C,zero);CHKERRQ(ierr);
455     ierr = VecSetValue(vec2_C,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
456     ierr = VecAssemblyBegin(vec2_C);CHKERRQ(ierr);
457     ierr = VecAssemblyEnd(vec2_C);CHKERRQ(ierr);
458     /* simplified solution of saddle point problem with null rhs on vertices multipliers */
459     ierr = MatMult(M1,vec2_C,vec1_C);CHKERRQ(ierr);
460     ierr = MatMult(pcbddc->local_auxmat2,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
461     ierr = VecScale(vec1_C,m_one);CHKERRQ(ierr);
462     if (n_vertices) {
463       ierr = MatMult(A_VR,pcbddc->vec1_R,vec2_V);CHKERRQ(ierr);
464     }
465     /* Set values in coarse basis function and subdomain part of coarse_mat */
466     /* coarse basis functions */
467     j = i+n_vertices; /* don't touch this! */
468     ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
469     ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
470     ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
471     ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
472     ierr = MatSetValues(pcbddc->coarse_phi_B,n_B,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
473     ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
474     if (pcbddc->switch_static || pcbddc->dbg_flag) {
475       ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
476       ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
477       ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
478       ierr = MatSetValues(pcbddc->coarse_phi_D,n_D,auxindices,1,&j,array,INSERT_VALUES);CHKERRQ(ierr);
479       ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
480     }
481     /* subdomain contribution to coarse matrix. WARNING -> column major ordering */
482     if (n_vertices) {
483       ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
484       ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size],array,n_vertices*sizeof(PetscScalar));CHKERRQ(ierr);
485       ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
486     }
487     ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
488     ierr = PetscMemcpy(&coarse_submat_vals[j*pcbddc->local_primal_size+n_vertices],array,n_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
489     ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
490 
491     if (pcbddc->dbg_flag) {
492       /* assemble subdomain vector on nodes */
493       ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
494       ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
495       ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
496       ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
497       ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
498       ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
499       /* assemble subdomain vector of lagrange multipliers */
500       ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr);
501       if (n_vertices) {
502         ierr = VecGetArrayRead(vec2_V,&array);CHKERRQ(ierr);
503         ierr = VecSetValues(pcbddc->vec1_P,n_vertices,auxindices,array,INSERT_VALUES);CHKERRQ(ierr);
504         ierr = VecRestoreArrayRead(vec2_V,&array);CHKERRQ(ierr);
505       }
506       ierr = VecGetArrayRead(vec1_C,&array);CHKERRQ(ierr);
507       ierr = VecSetValues(pcbddc->vec1_P,n_constraints,&auxindices[n_vertices],array,INSERT_VALUES);CHKERRQ(ierr);
508       ierr = VecRestoreArrayRead(vec1_C,&array);CHKERRQ(ierr);
509       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
510       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
511       ierr = VecScale(pcbddc->vec1_P,m_one);CHKERRQ(ierr);
512       /* check saddle point solution */
513       ierr = MatMult(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
514       ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
515       ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[j]);CHKERRQ(ierr);
516       ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
517       /* shift by the identity matrix */
518       ierr = VecSetValue(pcbddc->vec1_P,j,m_one,ADD_VALUES);CHKERRQ(ierr);
519       ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
520       ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
521       ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[j]);CHKERRQ(ierr);
522     }
523   }
524   /* call assembling routines for local coarse basis */
525   ierr = MatAssemblyBegin(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   ierr = MatAssemblyEnd(pcbddc->coarse_phi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527   if (pcbddc->switch_static || pcbddc->dbg_flag) {
528     ierr = MatAssemblyBegin(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
529     ierr = MatAssemblyEnd(pcbddc->coarse_phi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
530   }
531 
532   /* compute other basis functions for non-symmetric problems */
533   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
534   if (!setsym || (setsym && !issym)) {
535     if (!pcbddc->coarse_psi_B) {
536       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_B);CHKERRQ(ierr);
537       ierr = MatSetSizes(pcbddc->coarse_psi_B,n_B,pcbddc->local_primal_size,n_B,pcbddc->local_primal_size);CHKERRQ(ierr);
538       ierr = MatSetType(pcbddc->coarse_psi_B,impMatType);CHKERRQ(ierr);
539       ierr = MatSetUp(pcbddc->coarse_psi_B);CHKERRQ(ierr);
540     }
541     if ( (pcbddc->switch_static || pcbddc->dbg_flag) && !pcbddc->coarse_psi_D) {
542       ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->coarse_psi_D);CHKERRQ(ierr);
543       ierr = MatSetSizes(pcbddc->coarse_psi_D,n_D,pcbddc->local_primal_size,n_D,pcbddc->local_primal_size);CHKERRQ(ierr);
544       ierr = MatSetType(pcbddc->coarse_psi_D,impMatType);CHKERRQ(ierr);
545       ierr = MatSetUp(pcbddc->coarse_psi_D);CHKERRQ(ierr);
546     }
547     for (i=0;i<pcbddc->local_primal_size;i++) {
548       if (n_constraints) {
549         ierr = VecSet(vec1_C,zero);CHKERRQ(ierr);
550         for (j=0;j<n_constraints;j++) {
551           ierr = VecSetValue(vec1_C,j,coarse_submat_vals[(j+n_vertices)*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
552         }
553         ierr = VecAssemblyBegin(vec1_C);CHKERRQ(ierr);
554         ierr = VecAssemblyEnd(vec1_C);CHKERRQ(ierr);
555       }
556       if (i<n_vertices) {
557         ierr = VecSet(vec1_V,zero);CHKERRQ(ierr);
558         ierr = VecSetValue(vec1_V,i,m_one,INSERT_VALUES);CHKERRQ(ierr);
559         ierr = VecAssemblyBegin(vec1_V);CHKERRQ(ierr);
560         ierr = VecAssemblyEnd(vec1_V);CHKERRQ(ierr);
561         ierr = MatMultTranspose(A_VR,vec1_V,pcbddc->vec1_R);CHKERRQ(ierr);
562         if (n_constraints) {
563           ierr = MatMultTransposeAdd(C_CR,vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
564         }
565       } else {
566         ierr = MatMultTranspose(C_CR,vec1_C,pcbddc->vec1_R);CHKERRQ(ierr);
567       }
568       ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr);
569       ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
570       ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571       ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572       ierr = VecGetArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
573       ierr = MatSetValues(pcbddc->coarse_psi_B,n_B,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
574       ierr = VecRestoreArrayRead(pcis->vec1_B,&array);CHKERRQ(ierr);
575       if (i<n_vertices) {
576         ierr = MatSetValue(pcbddc->coarse_psi_B,idx_V_B[i],i,one,INSERT_VALUES);CHKERRQ(ierr);
577       }
578       if (pcbddc->switch_static || pcbddc->dbg_flag) {
579         ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
580         ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
581         ierr = VecGetArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
582         ierr = MatSetValues(pcbddc->coarse_psi_D,n_D,auxindices,1,&i,array,INSERT_VALUES);CHKERRQ(ierr);
583         ierr = VecRestoreArrayRead(pcis->vec1_D,&array);CHKERRQ(ierr);
584       }
585 
586       if (pcbddc->dbg_flag) {
587         /* assemble subdomain vector on nodes */
588         ierr = VecSet(pcis->vec1_N,zero);CHKERRQ(ierr);
589         ierr = VecGetArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
590         ierr = VecSetValues(pcis->vec1_N,n_R,idx_R_local,array,INSERT_VALUES);CHKERRQ(ierr);
591         ierr = VecRestoreArrayRead(pcbddc->vec1_R,&array);CHKERRQ(ierr);
592         if (i<n_vertices) {
593           ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],one,INSERT_VALUES);CHKERRQ(ierr);
594         }
595         ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
596         ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
597         /* assemble subdomain vector of lagrange multipliers */
598         for (j=0;j<pcbddc->local_primal_size;j++) {
599           ierr = VecSetValue(pcbddc->vec1_P,j,-coarse_submat_vals[j*pcbddc->local_primal_size+i],INSERT_VALUES);CHKERRQ(ierr);
600         }
601         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
602         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
603         /* check saddle point solution */
604         ierr = MatMultTranspose(pcbddc->local_mat,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr);
605         ierr = MatMultTransposeAdd(pcbddc->ConstraintMatrix,pcbddc->vec1_P,pcis->vec2_N,pcis->vec2_N);CHKERRQ(ierr);
606         ierr = VecNorm(pcis->vec2_N,NORM_INFINITY,&coarsefunctions_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
607         ierr = MatMult(pcbddc->ConstraintMatrix,pcis->vec1_N,pcbddc->vec1_P);CHKERRQ(ierr);
608         /* shift by the identity matrix */
609         ierr = VecSetValue(pcbddc->vec1_P,i,m_one,ADD_VALUES);CHKERRQ(ierr);
610         ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr);
611         ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr);
612         ierr = VecNorm(pcbddc->vec1_P,NORM_INFINITY,&constraints_errors[i+pcbddc->local_primal_size]);CHKERRQ(ierr);
613       }
614     }
615     ierr = MatAssemblyBegin(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
616     ierr = MatAssemblyEnd(pcbddc->coarse_psi_B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
617     if (pcbddc->switch_static || pcbddc->dbg_flag) {
618       ierr = MatAssemblyBegin(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
619       ierr = MatAssemblyEnd(pcbddc->coarse_psi_D,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
620     }
621   }
622   ierr = PetscFree(idx_V_B);CHKERRQ(ierr);
623   /* Checking coarse_sub_mat and coarse basis functios */
624   /* Symmetric case     : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
625   /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
626   if (pcbddc->dbg_flag) {
627     Mat         coarse_sub_mat;
628     Mat         AUXMAT,TM1,TM2,TM3,TM4;
629     Mat         coarse_phi_D,coarse_phi_B;
630     Mat         coarse_psi_D,coarse_psi_B;
631     Mat         A_II,A_BB,A_IB,A_BI;
632     MatType     checkmattype=MATSEQAIJ;
633     PetscReal   real_value;
634 
635     ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr);
636     ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr);
637     ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr);
638     ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr);
639     ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr);
640     ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr);
641     if (pcbddc->coarse_psi_B) {
642       ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr);
643       ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr);
644     }
645     ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr);
646 
647     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
648     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat and local basis functions\n");CHKERRQ(ierr);
649     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
650     if (pcbddc->coarse_psi_B) {
651       ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
652       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
653       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
654       ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
655       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
656       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
657       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
658       ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
659       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
660       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
661       ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
662       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
663     } else {
664       ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr);
665       ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr);
666       ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
667       ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr);
668       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
669       ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr);
670       ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr);
671       ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr);
672     }
673     ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
674     ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
675     ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
676     ierr = MatConvert(TM1,MATSEQDENSE,MAT_REUSE_MATRIX,&TM1);CHKERRQ(ierr);
677     ierr = MatAXPY(TM1,m_one,coarse_sub_mat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
678     ierr = MatNorm(TM1,NORM_INFINITY,&real_value);CHKERRQ(ierr);
679     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
680     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"----------------------------------\n");CHKERRQ(ierr);
681     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d \n",PetscGlobalRank);CHKERRQ(ierr);
682     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"matrix error = % 1.14e\n",real_value);CHKERRQ(ierr);
683     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (phi) errors\n");CHKERRQ(ierr);
684     for (i=0;i<pcbddc->local_primal_size;i++) {
685       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,coarsefunctions_errors[i]);CHKERRQ(ierr);
686     }
687     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (phi) errors\n");CHKERRQ(ierr);
688     for (i=0;i<pcbddc->local_primal_size;i++) {
689       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i,constraints_errors[i]);CHKERRQ(ierr);
690     }
691     if (pcbddc->coarse_psi_B) {
692       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"coarse functions (psi) errors\n");CHKERRQ(ierr);
693       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
694         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,coarsefunctions_errors[i]);CHKERRQ(ierr);
695       }
696       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"constraints (psi) errors\n");CHKERRQ(ierr);
697       for (i=pcbddc->local_primal_size;i<2*pcbddc->local_primal_size;i++) {
698         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local %02d-th function error = % 1.14e\n",i-pcbddc->local_primal_size,constraints_errors[i]);CHKERRQ(ierr);
699       }
700     }
701     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
702     ierr = MatDestroy(&A_II);CHKERRQ(ierr);
703     ierr = MatDestroy(&A_BB);CHKERRQ(ierr);
704     ierr = MatDestroy(&A_IB);CHKERRQ(ierr);
705     ierr = MatDestroy(&A_BI);CHKERRQ(ierr);
706     ierr = MatDestroy(&TM1);CHKERRQ(ierr);
707     ierr = MatDestroy(&TM2);CHKERRQ(ierr);
708     ierr = MatDestroy(&TM3);CHKERRQ(ierr);
709     ierr = MatDestroy(&TM4);CHKERRQ(ierr);
710     ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr);
711     ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr);
712     if (pcbddc->coarse_psi_B) {
713       ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr);
714       ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr);
715     }
716     ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr);
717     ierr = ISRestoreIndices(pcbddc->is_R_local,&idx_R_local);CHKERRQ(ierr);
718     ierr = PetscFree(coarsefunctions_errors);CHKERRQ(ierr);
719     ierr = PetscFree(constraints_errors);CHKERRQ(ierr);
720   }
721   /* free memory */
722   if (n_vertices) {
723     ierr = VecDestroy(&vec1_V);CHKERRQ(ierr);
724     ierr = VecDestroy(&vec2_V);CHKERRQ(ierr);
725     ierr = MatDestroy(&A_RV);CHKERRQ(ierr);
726     ierr = MatDestroy(&A_VR);CHKERRQ(ierr);
727     ierr = MatDestroy(&A_VV);CHKERRQ(ierr);
728   }
729   if (n_constraints) {
730     ierr = VecDestroy(&vec1_C);CHKERRQ(ierr);
731     ierr = VecDestroy(&vec2_C);CHKERRQ(ierr);
732     ierr = MatDestroy(&M1);CHKERRQ(ierr);
733     ierr = MatDestroy(&C_CR);CHKERRQ(ierr);
734   }
735   ierr = PetscFree(auxindices);CHKERRQ(ierr);
736   /* get back data */
737   *coarse_submat_vals_n = coarse_submat_vals;
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "PCBDDCSetUpLocalMatrices"
743 PetscErrorCode PCBDDCSetUpLocalMatrices(PC pc)
744 {
745   PC_IS*            pcis = (PC_IS*)(pc->data);
746   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
747   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
748   PetscBool         issbaij,isbaij;
749   /* manage repeated solves */
750   MatReuse          reuse;
751   MatStructure      matstruct;
752   PetscErrorCode    ierr;
753 
754   PetscFunctionBegin;
755   if (pcbddc->use_change_of_basis && !pcbddc->ChangeOfBasisMatrix) {
756     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Change of basis matrix has not been created");
757   }
758   /* get mat flags */
759   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
760   reuse = MAT_INITIAL_MATRIX;
761   if (pc->setupcalled) {
762     /* when matstruct is SAME_PRECONDITIONER, we shouldn't be here */
763     if (matstruct == SAME_PRECONDITIONER) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen");
764     if (matstruct == SAME_NONZERO_PATTERN) {
765       reuse = MAT_REUSE_MATRIX;
766     } else {
767       reuse = MAT_INITIAL_MATRIX;
768     }
769   }
770   if (reuse == MAT_INITIAL_MATRIX) {
771     ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr);
772     ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr);
773     ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr);
774     ierr = MatDestroy(&pcis->A_BB);CHKERRQ(ierr);
775     ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr);
776   }
777 
778   /* transform local matrices if needed */
779   if (pcbddc->use_change_of_basis) {
780     Mat         change_mat_all;
781     PetscScalar *row_cmat_values;
782     PetscInt    *row_cmat_indices;
783     PetscInt    *nnz,*is_indices,*temp_indices;
784     PetscInt    i,j,k,n_D,n_B;
785 
786     /* Get Non-overlapping dimensions */
787     n_B = pcis->n_B;
788     n_D = pcis->n-n_B;
789 
790     /* compute nonzero structure of change of basis on all local nodes */
791     ierr = PetscMalloc(pcis->n*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
792     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
793     for (i=0;i<n_D;i++) nnz[is_indices[i]] = 1;
794     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
795     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
796     k=1;
797     for (i=0;i<n_B;i++) {
798       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
799       nnz[is_indices[i]]=j;
800       if (k < j) k = j;
801       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,NULL,NULL);CHKERRQ(ierr);
802     }
803     ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
804     /* assemble change of basis matrix on the whole set of local dofs */
805     ierr = PetscMalloc(k*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
806     ierr = MatCreate(PETSC_COMM_SELF,&change_mat_all);CHKERRQ(ierr);
807     ierr = MatSetSizes(change_mat_all,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr);
808     ierr = MatSetType(change_mat_all,MATSEQAIJ);CHKERRQ(ierr);
809     ierr = MatSeqAIJSetPreallocation(change_mat_all,0,nnz);CHKERRQ(ierr);
810     ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
811     for (i=0;i<n_D;i++) {
812       ierr = MatSetValue(change_mat_all,is_indices[i],is_indices[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
813     }
814     ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
815     ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
816     for (i=0;i<n_B;i++) {
817       ierr = MatGetRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
818       for (k=0; k<j; k++) temp_indices[k]=is_indices[row_cmat_indices[k]];
819       ierr = MatSetValues(change_mat_all,1,&is_indices[i],j,temp_indices,row_cmat_values,INSERT_VALUES);CHKERRQ(ierr);
820       ierr = MatRestoreRow(pcbddc->ChangeOfBasisMatrix,i,&j,(const PetscInt**)&row_cmat_indices,(const PetscScalar**)&row_cmat_values);CHKERRQ(ierr);
821     }
822     ierr = MatAssemblyBegin(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     ierr = MatAssemblyEnd(change_mat_all,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824     /* TODO: HOW TO WORK WITH BAIJ and SBAIJ? PtAP not provided */
825     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
826     ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQBAIJ,&isbaij);CHKERRQ(ierr);
827     if (!issbaij && !isbaij) {
828       ierr = MatPtAP(matis->A,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
829     } else {
830       Mat work_mat;
831       ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr);
832       ierr = MatPtAP(work_mat,change_mat_all,reuse,2.0,&pcbddc->local_mat);CHKERRQ(ierr);
833       ierr = MatDestroy(&work_mat);CHKERRQ(ierr);
834     }
835     /*
836     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
837     ierr = MatView(change_mat_all,(PetscViewer)0);CHKERRQ(ierr);
838     */
839     ierr = MatDestroy(&change_mat_all);CHKERRQ(ierr);
840     ierr = PetscFree(nnz);CHKERRQ(ierr);
841     ierr = PetscFree(temp_indices);CHKERRQ(ierr);
842   } else {
843     /* without change of basis, the local matrix is unchanged */
844     if (!pcbddc->local_mat) {
845       ierr = PetscObjectReference((PetscObject)matis->A);CHKERRQ(ierr);
846       pcbddc->local_mat = matis->A;
847     }
848   }
849 
850   /* get submatrices */
851   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_I_local,reuse,&pcis->A_II);CHKERRQ(ierr);
852   ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_B_local,reuse,&pcis->A_BB);CHKERRQ(ierr);
853   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
854   if (!issbaij) {
855     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
856     ierr = MatGetSubMatrix(pcbddc->local_mat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
857   } else {
858     Mat newmat;
859     ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
860     ierr = MatGetSubMatrix(newmat,pcis->is_I_local,pcis->is_B_local,reuse,&pcis->A_IB);CHKERRQ(ierr);
861     ierr = MatGetSubMatrix(newmat,pcis->is_B_local,pcis->is_I_local,reuse,&pcis->A_BI);CHKERRQ(ierr);
862     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "PCBDDCSetUpLocalScatters"
869 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
870 {
871   PC_IS*         pcis = (PC_IS*)(pc->data);
872   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
873   IS             is_aux1,is_aux2;
874   PetscInt       *aux_array1,*aux_array2,*is_indices,*idx_R_local;
875   PetscInt       n_vertices,i,j,n_R,n_D,n_B;
876   PetscInt       vbs,bs;
877   PetscBT        bitmask;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   /*
882     No need to setup local scatters if
883       - primal space is unchanged
884         AND
885       - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
886         AND
887       - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
888   */
889   if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
890     PetscFunctionReturn(0);
891   }
892   /* destroy old objects */
893   ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr);
894   ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr);
895   ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr);
896   /* Set Non-overlapping dimensions */
897   n_B = pcis->n_B; n_D = pcis->n - n_B;
898   n_vertices = pcbddc->n_actual_vertices;
899   /* create auxiliary bitmask */
900   ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr);
901   for (i=0;i<n_vertices;i++) {
902     ierr = PetscBTSet(bitmask,pcbddc->primal_indices_local_idxs[i]);CHKERRQ(ierr);
903   }
904 
905   /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
906   ierr = PetscMalloc((pcis->n-n_vertices)*sizeof(PetscInt),&idx_R_local);CHKERRQ(ierr);
907   for (i=0, n_R=0; i<pcis->n; i++) {
908     if (!PetscBTLookup(bitmask,i)) {
909       idx_R_local[n_R] = i;
910       n_R++;
911     }
912   }
913 
914   /* Block code */
915   vbs = 1;
916   ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr);
917   if (bs>1 && !(n_vertices%bs)) {
918     PetscBool is_blocked = PETSC_TRUE;
919     PetscInt  *vary;
920     /* Verify if the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
921     ierr = PetscMalloc(pcis->n/bs*sizeof(PetscInt),&vary);CHKERRQ(ierr);
922     ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr);
923     for (i=0; i<n_vertices; i++) vary[pcbddc->primal_indices_local_idxs[i]/bs]++;
924     for (i=0; i<n_vertices; i++) {
925       if (vary[i]!=0 && vary[i]!=bs) {
926         is_blocked = PETSC_FALSE;
927         break;
928       }
929     }
930     if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
931       vbs = bs;
932       for (i=0;i<n_R/vbs;i++) {
933         idx_R_local[i] = idx_R_local[vbs*i]/vbs;
934       }
935     }
936     ierr = PetscFree(vary);CHKERRQ(ierr);
937   }
938   ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr);
939   ierr = PetscFree(idx_R_local);CHKERRQ(ierr);
940 
941   /* print some info if requested */
942   if (pcbddc->dbg_flag) {
943     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
944     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
945     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
946     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr);
947     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr);
948     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices,pcbddc->local_primal_size);CHKERRQ(ierr);
949     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"pcbddc->n_vertices = %d, pcbddc->n_constraints = %d\n",pcbddc->n_vertices,pcbddc->n_constraints);CHKERRQ(ierr);
950     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
951   }
952 
953   /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
954   ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
955   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
956   ierr = PetscMalloc((pcis->n_B-n_vertices)*sizeof(PetscInt),&aux_array2);CHKERRQ(ierr);
957   ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
958   for (i=0; i<n_D; i++) {
959     ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr);
960   }
961   ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
962   for (i=0, j=0; i<n_R; i++) {
963     if (!PetscBTLookup(bitmask,idx_R_local[i])) {
964       aux_array1[j++] = i;
965     }
966   }
967   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
968   ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
969   for (i=0, j=0; i<n_B; i++) {
970     if (!PetscBTLookup(bitmask,is_indices[i])) {
971       aux_array2[j++] = i;
972     }
973   }
974   ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr);
975   ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr);
976   ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr);
977   ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
978   ierr = ISDestroy(&is_aux2);CHKERRQ(ierr);
979 
980   if (pcbddc->switch_static || pcbddc->dbg_flag) {
981     ierr = PetscMalloc(n_D*sizeof(PetscInt),&aux_array1);CHKERRQ(ierr);
982     for (i=0, j=0; i<n_R; i++) {
983       if (PetscBTLookup(bitmask,idx_R_local[i])) {
984         aux_array1[j++] = i;
985       }
986     }
987     ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr);
988     ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr);
989     ierr = ISDestroy(&is_aux1);CHKERRQ(ierr);
990   }
991   ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr);
992   ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "PCBDDCSetUpLocalSolvers"
999 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc)
1000 {
1001   PC_BDDC        *pcbddc = (PC_BDDC*)pc->data;
1002   PC_IS          *pcis = (PC_IS*)pc->data;
1003   PC             pc_temp;
1004   Mat            A_RR;
1005   MatStructure   matstruct;
1006   MatReuse       reuse;
1007   PetscScalar    m_one = -1.0;
1008   PetscReal      value;
1009   PetscInt       n_D,n_R,ibs,mbs;
1010   PetscBool      use_exact,use_exact_reduced,issbaij;
1011   PetscErrorCode ierr;
1012   /* prefixes stuff */
1013   char           dir_prefix[256],neu_prefix[256],str_level[3];
1014   size_t         len;
1015 
1016   PetscFunctionBegin;
1017   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
1018 
1019   /* compute prefixes */
1020   ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr);
1021   ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr);
1022   if (!pcbddc->current_level) {
1023     ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1024     ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
1025     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1026     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1027   } else {
1028     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
1029     sprintf(str_level,"%d_",(int)(pcbddc->current_level));
1030     ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
1031     len -= 15; /* remove "pc_bddc_coarse_" */
1032     if (pcbddc->current_level>1) len -= 2; /* remove "X_" with X level number (works with 9 levels max) */
1033     ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1034     ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
1035     *(dir_prefix+len)='\0';
1036     *(neu_prefix+len)='\0';
1037     ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr);
1038     ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr);
1039     ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr);
1040     ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr);
1041   }
1042 
1043   /* DIRICHLET PROBLEM */
1044   /* Matrix for Dirichlet problem is pcis->A_II */
1045   ierr = ISGetSize(pcis->is_I_local,&n_D);CHKERRQ(ierr);
1046   if (!pcbddc->ksp_D) { /* create object if not yet build */
1047     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr);
1048     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr);
1049     /* default */
1050     ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr);
1051     ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr);
1052     ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1053     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1054     if (issbaij) {
1055       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1056     } else {
1057       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1058     }
1059     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1060   }
1061   ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II,matstruct);CHKERRQ(ierr);
1062   /* Allow user's customization */
1063   ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr);
1064   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1065   if (!n_D) {
1066     ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr);
1067     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1068   }
1069   /* Set Up KSP for Dirichlet problem of BDDC */
1070   ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr);
1071   /* set ksp_D into pcis data */
1072   ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr);
1073   ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr);
1074   pcis->ksp_D = pcbddc->ksp_D;
1075 
1076   /* NEUMANN PROBLEM */
1077   /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
1078   ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr);
1079   if (pcbddc->ksp_R) { /* already created ksp */
1080     PetscInt nn_R;
1081     ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR,NULL);CHKERRQ(ierr);
1082     ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr);
1083     ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr);
1084     if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
1085       ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr);
1086       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1087       reuse = MAT_INITIAL_MATRIX;
1088     } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
1089       if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
1090         ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1091         reuse = MAT_INITIAL_MATRIX;
1092       } else { /* safe to reuse the matrix */
1093         reuse = MAT_REUSE_MATRIX;
1094       }
1095     }
1096     /* last check */
1097     if (matstruct == DIFFERENT_NONZERO_PATTERN) {
1098       ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1099       reuse = MAT_INITIAL_MATRIX;
1100     }
1101   } else { /* first time, so we need to create the matrix */
1102     reuse = MAT_INITIAL_MATRIX;
1103   }
1104   /* extract A_RR */
1105   ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr);
1106   ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr);
1107   if (ibs != mbs) {
1108     Mat newmat;
1109     ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INITIAL_MATRIX,&newmat);CHKERRQ(ierr);
1110     ierr = MatGetSubMatrix(newmat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1111     ierr = MatDestroy(&newmat);CHKERRQ(ierr);
1112   } else {
1113     ierr = MatGetSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr);
1114   }
1115   if (!pcbddc->ksp_R) { /* create object if not present */
1116     ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr);
1117     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr);
1118     /* default */
1119     ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr);
1120     ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr);
1121     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1122     ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr);
1123     if (issbaij) {
1124       ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr);
1125     } else {
1126       ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr);
1127     }
1128     ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr);
1129   }
1130   ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR,matstruct);CHKERRQ(ierr);
1131   /* Allow user's customization */
1132   ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr);
1133   /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
1134   if (!n_R) {
1135     ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr);
1136     ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr);
1137   }
1138   /* Set Up KSP for Neumann problem of BDDC */
1139   ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr);
1140 
1141   /* check Dirichlet and Neumann solvers and adapt them if a nullspace correction is needed */
1142   if (pcbddc->NullSpace || pcbddc->dbg_flag) {
1143     /* Dirichlet */
1144     ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr);
1145     ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr);
1146     ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr);
1147     ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr);
1148     ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr);
1149     /* need to be adapted? */
1150     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1151     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1152     ierr = PCBDDCSetUseExactDirichlet(pc,use_exact_reduced);CHKERRQ(ierr);
1153     /* print info */
1154     if (pcbddc->dbg_flag) {
1155       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1156       ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1157       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
1158       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Checking solution of Dirichlet and Neumann problems\n");CHKERRQ(ierr);
1159       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr);
1160       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1161     }
1162     if (pcbddc->NullSpace && !use_exact_reduced && !pcbddc->switch_static) {
1163       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcis->is_I_local);CHKERRQ(ierr);
1164     }
1165 
1166     /* Neumann */
1167     ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr);
1168     ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1169     ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1170     ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr);
1171     ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr);
1172     /* need to be adapted? */
1173     use_exact = (PetscAbsReal(value) > 1.e-4 ? PETSC_FALSE : PETSC_TRUE);
1174     ierr = MPI_Allreduce(&use_exact,&use_exact_reduced,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
1175     /* print info */
1176     if (pcbddc->dbg_flag) {
1177       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr);
1178       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1179     }
1180     if (pcbddc->NullSpace && !use_exact_reduced) { /* is it the right logic? */
1181       ierr = PCBDDCNullSpaceAssembleCorrection(pc,pcbddc->is_R_local);CHKERRQ(ierr);
1182     }
1183   }
1184   /* free Neumann problem's matrix */
1185   ierr = MatDestroy(&A_RR);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "PCBDDCSolveSaddlePoint"
1191 static PetscErrorCode  PCBDDCSolveSaddlePoint(PC pc)
1192 {
1193   PetscErrorCode ierr;
1194   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1195 
1196   PetscFunctionBegin;
1197   ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr);
1198   if (pcbddc->local_auxmat1) {
1199     ierr = MatMult(pcbddc->local_auxmat1,pcbddc->vec2_R,pcbddc->vec1_C);CHKERRQ(ierr);
1200     ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr);
1201   }
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "PCBDDCApplyInterfacePreconditioner"
1207 PetscErrorCode  PCBDDCApplyInterfacePreconditioner(PC pc)
1208 {
1209   PetscErrorCode ierr;
1210   PC_BDDC*        pcbddc = (PC_BDDC*)(pc->data);
1211   PC_IS*            pcis = (PC_IS*)  (pc->data);
1212   const PetscScalar zero = 0.0;
1213 
1214   PetscFunctionBegin;
1215   /* Application of PHI^T (or PSI^T)  */
1216   if (pcbddc->coarse_psi_B) {
1217     ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1218     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1219   } else {
1220     ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr);
1221     if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); }
1222   }
1223   /* Scatter data of coarse_rhs */
1224   if (pcbddc->coarse_rhs) { ierr = VecSet(pcbddc->coarse_rhs,zero);CHKERRQ(ierr); }
1225   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1226 
1227   /* Local solution on R nodes */
1228   ierr = VecSet(pcbddc->vec1_R,zero);CHKERRQ(ierr);
1229   ierr = VecScatterBegin(pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   ierr = VecScatterEnd  (pcbddc->R_to_B,pcis->vec1_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1231   if (pcbddc->switch_static) {
1232     ierr = VecScatterBegin(pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1233     ierr = VecScatterEnd  (pcbddc->R_to_D,pcis->vec1_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   }
1235   ierr = PCBDDCSolveSaddlePoint(pc);CHKERRQ(ierr);
1236   ierr = VecSet(pcis->vec1_B,zero);CHKERRQ(ierr);
1237   ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238   ierr = VecScatterEnd  (pcbddc->R_to_B,pcbddc->vec2_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239   if (pcbddc->switch_static) {
1240     ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1241     ierr = VecScatterEnd  (pcbddc->R_to_D,pcbddc->vec2_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1242   }
1243 
1244   /* Coarse solution */
1245   ierr = PCBDDCScatterCoarseDataEnd(pc,pcbddc->vec1_P,pcbddc->coarse_rhs,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246   if (pcbddc->coarse_rhs) { /* TODO remove null space when doing multilevel */
1247     ierr = KSPSolve(pcbddc->coarse_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
1248   }
1249   ierr = PCBDDCScatterCoarseDataBegin(pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1250   ierr = PCBDDCScatterCoarseDataEnd  (pc,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1251 
1252   /* Sum contributions from two levels */
1253   ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr);
1254   if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "PCBDDCScatterCoarseDataBegin"
1260 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1261 {
1262   PetscErrorCode ierr;
1263   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "PCBDDCScatterCoarseDataEnd"
1272 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc,Vec vec_from, Vec vec_to, InsertMode imode, ScatterMode smode)
1273 {
1274   PetscErrorCode ierr;
1275   PC_BDDC*       pcbddc = (PC_BDDC*)(pc->data);
1276 
1277   PetscFunctionBegin;
1278   ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,vec_from,vec_to,imode,smode);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 /* uncomment for testing purposes */
1283 /* #define PETSC_MISSING_LAPACK_GESVD 1 */
1284 #undef __FUNCT__
1285 #define __FUNCT__ "PCBDDCConstraintsSetUp"
1286 PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
1287 {
1288   PetscErrorCode    ierr;
1289   PC_IS*            pcis = (PC_IS*)(pc->data);
1290   PC_BDDC*          pcbddc = (PC_BDDC*)pc->data;
1291   Mat_IS*           matis = (Mat_IS*)pc->pmat->data;
1292   /* constraint and (optionally) change of basis matrix implemented as SeqAIJ */
1293   MatType           impMatType=MATSEQAIJ;
1294   /* one and zero */
1295   PetscScalar       one=1.0,zero=0.0;
1296   /* space to store constraints and their local indices */
1297   PetscScalar       *temp_quadrature_constraint;
1298   PetscInt          *temp_indices,*temp_indices_to_constraint,*temp_indices_to_constraint_B;
1299   /* iterators */
1300   PetscInt          i,j,k,total_counts,temp_start_ptr;
1301   /* stuff to store connected components stored in pcbddc->mat_graph */
1302   IS                ISForVertices,*ISForFaces,*ISForEdges,*used_IS;
1303   PetscInt          n_ISForFaces,n_ISForEdges;
1304   /* near null space stuff */
1305   MatNullSpace      nearnullsp;
1306   const Vec         *nearnullvecs;
1307   Vec               *localnearnullsp;
1308   PetscBool         nnsp_has_cnst;
1309   PetscInt          nnsp_size;
1310   PetscScalar       *array;
1311   /* BLAS integers */
1312   PetscBLASInt      lwork,lierr;
1313   PetscBLASInt      Blas_N,Blas_M,Blas_K,Blas_one=1;
1314   PetscBLASInt      Blas_LDA,Blas_LDB,Blas_LDC;
1315   /* LAPACK working arrays for SVD or POD */
1316   PetscBool         skip_lapack;
1317   PetscScalar       *work;
1318   PetscReal         *singular_vals;
1319 #if defined(PETSC_USE_COMPLEX)
1320   PetscReal         *rwork;
1321 #endif
1322 #if defined(PETSC_MISSING_LAPACK_GESVD)
1323   PetscBLASInt      Blas_one_2=1;
1324   PetscScalar       *temp_basis,*correlation_mat;
1325 #else
1326   PetscBLASInt      dummy_int_1=1,dummy_int_2=1;
1327   PetscScalar       dummy_scalar_1=0.0,dummy_scalar_2=0.0;
1328 #endif
1329   /* reuse */
1330   PetscInt          olocal_primal_size;
1331   PetscInt          *oprimal_indices_local_idxs;
1332   /* change of basis */
1333   PetscInt          *aux_primal_numbering,*aux_primal_minloc,*global_indices;
1334   PetscBool         boolforchange,qr_needed;
1335   PetscBT           touched,change_basis,qr_needed_idx;
1336   /* auxiliary stuff */
1337   PetscInt          *nnz,*is_indices,*aux_primal_numbering_B;
1338   /* some quantities */
1339   PetscInt          n_vertices,total_primal_vertices,valid_constraints;
1340   PetscInt          size_of_constraint,max_size_of_constraint,max_constraints,temp_constraints;
1341 
1342 
1343   PetscFunctionBegin;
1344   /* Destroy Mat objects computed previously */
1345   ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1346   ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1347   /* Get index sets for faces, edges and vertices from graph */
1348   if (!pcbddc->use_faces && !pcbddc->use_edges && !pcbddc->use_vertices) {
1349     pcbddc->use_vertices = PETSC_TRUE;
1350   }
1351   ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,pcbddc->use_faces,pcbddc->use_edges,pcbddc->use_vertices,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
1352   /* HACK: provide functions to set change of basis */
1353   if (!ISForVertices && pcbddc->NullSpace) {
1354     pcbddc->use_change_of_basis = PETSC_TRUE;
1355     pcbddc->use_change_on_faces = PETSC_FALSE;
1356   }
1357   /* print some info */
1358   if (pcbddc->dbg_flag) {
1359     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
1360     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1361     i = 0;
1362     if (ISForVertices) {
1363       ierr = ISGetSize(ISForVertices,&i);CHKERRQ(ierr);
1364     }
1365     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices\n",PetscGlobalRank,i);CHKERRQ(ierr);
1366     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges\n",PetscGlobalRank,n_ISForEdges);CHKERRQ(ierr);
1367     ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces\n",PetscGlobalRank,n_ISForFaces);CHKERRQ(ierr);
1368     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
1369   }
1370   /* check if near null space is attached to global mat */
1371   ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr);
1372   if (nearnullsp) {
1373     ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr);
1374     /* remove any stored info */
1375     ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr);
1376     ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1377     /* store information for BDDC solver reuse */
1378     ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr);
1379     pcbddc->onearnullspace = nearnullsp;
1380     ierr = PetscMalloc(nnsp_size*sizeof(PetscObjectState),&pcbddc->onearnullvecs_state);CHKERRQ(ierr);
1381     for (i=0;i<nnsp_size;i++) {
1382       ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr);
1383     }
1384   } else { /* if near null space is not provided BDDC uses constants by default */
1385     nnsp_size = 0;
1386     nnsp_has_cnst = PETSC_TRUE;
1387   }
1388   /* get max number of constraints on a single cc */
1389   max_constraints = nnsp_size;
1390   if (nnsp_has_cnst) max_constraints++;
1391 
1392   /*
1393        Evaluate maximum storage size needed by the procedure
1394        - temp_indices will contain start index of each constraint stored as follows
1395        - temp_indices_to_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in local numbering) on which the constraint acts
1396        - temp_indices_to_constraint_B[temp_indices[i],...,temp[indices[i+1]-1] will contain the indices (in boundary numbering) on which the constraint acts
1397        - temp_quadrature_constraint  [temp_indices[i],...,temp[indices[i+1]-1] will contain the scalars representing the constraint itself
1398                                                                                                                                                          */
1399   total_counts = n_ISForFaces+n_ISForEdges;
1400   total_counts *= max_constraints;
1401   n_vertices = 0;
1402   if (ISForVertices) {
1403     ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr);
1404   }
1405   total_counts += n_vertices;
1406   ierr = PetscMalloc((total_counts+1)*sizeof(PetscInt),&temp_indices);CHKERRQ(ierr);
1407   ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr);
1408   total_counts = 0;
1409   max_size_of_constraint = 0;
1410   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1411     if (i<n_ISForEdges) {
1412       used_IS = &ISForEdges[i];
1413     } else {
1414       used_IS = &ISForFaces[i-n_ISForEdges];
1415     }
1416     ierr = ISGetSize(*used_IS,&j);CHKERRQ(ierr);
1417     total_counts += j;
1418     max_size_of_constraint = PetscMax(j,max_size_of_constraint);
1419   }
1420   total_counts *= max_constraints;
1421   total_counts += n_vertices;
1422   ierr = PetscMalloc(total_counts*sizeof(PetscScalar),&temp_quadrature_constraint);CHKERRQ(ierr);
1423   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint);CHKERRQ(ierr);
1424   ierr = PetscMalloc(total_counts*sizeof(PetscInt),&temp_indices_to_constraint_B);CHKERRQ(ierr);
1425   /* get local part of global near null space vectors */
1426   ierr = PetscMalloc(nnsp_size*sizeof(Vec),&localnearnullsp);CHKERRQ(ierr);
1427   for (k=0;k<nnsp_size;k++) {
1428     ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr);
1429     ierr = VecScatterBegin(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1430     ierr = VecScatterEnd(matis->ctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1431   }
1432 
1433   /* whether or not to skip lapack calls */
1434   skip_lapack = PETSC_TRUE;
1435   if (n_ISForFaces+n_ISForEdges) skip_lapack = PETSC_FALSE;
1436 
1437   /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
1438   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1439     PetscScalar temp_work;
1440 #if defined(PETSC_MISSING_LAPACK_GESVD)
1441     /* Proper Orthogonal Decomposition (POD) using the snapshot method */
1442     ierr = PetscMalloc(max_constraints*max_constraints*sizeof(PetscScalar),&correlation_mat);CHKERRQ(ierr);
1443     ierr = PetscMalloc(max_constraints*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1444     ierr = PetscMalloc(max_size_of_constraint*max_constraints*sizeof(PetscScalar),&temp_basis);CHKERRQ(ierr);
1445 #if defined(PETSC_USE_COMPLEX)
1446     ierr = PetscMalloc(3*max_constraints*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1447 #endif
1448     /* now we evaluate the optimal workspace using query with lwork=-1 */
1449     ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1450     ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr);
1451     lwork = -1;
1452     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1453 #if !defined(PETSC_USE_COMPLEX)
1454     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
1455 #else
1456     PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
1457 #endif
1458     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1459     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
1460 #else /* on missing GESVD */
1461     /* SVD */
1462     PetscInt max_n,min_n;
1463     max_n = max_size_of_constraint;
1464     min_n = max_constraints;
1465     if (max_size_of_constraint < max_constraints) {
1466       min_n = max_size_of_constraint;
1467       max_n = max_constraints;
1468     }
1469     ierr = PetscMalloc(min_n*sizeof(PetscReal),&singular_vals);CHKERRQ(ierr);
1470 #if defined(PETSC_USE_COMPLEX)
1471     ierr = PetscMalloc(5*min_n*sizeof(PetscReal),&rwork);CHKERRQ(ierr);
1472 #endif
1473     /* now we evaluate the optimal workspace using query with lwork=-1 */
1474     lwork = -1;
1475     ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr);
1476     ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr);
1477     ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr);
1478     ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1479 #if !defined(PETSC_USE_COMPLEX)
1480     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,&lierr));
1481 #else
1482     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[0],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,&temp_work,&lwork,rwork,&lierr));
1483 #endif
1484     ierr = PetscFPTrapPop();CHKERRQ(ierr);
1485     if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
1486 #endif /* on missing GESVD */
1487     /* Allocate optimal workspace */
1488     ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr);
1489     ierr = PetscMalloc((PetscInt)lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
1490   }
1491   /* Now we can loop on constraining sets */
1492   total_counts = 0;
1493   temp_indices[0] = 0;
1494   /* vertices */
1495   if (ISForVertices) {
1496     ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1497     if (nnsp_has_cnst) { /* consider all vertices */
1498       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr);
1499       for (i=0;i<n_vertices;i++) {
1500         temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1501         temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1502         total_counts++;
1503       }
1504     } else { /* consider vertices for which exist at least a localnearnullsp which is not null there */
1505       PetscBool used_vertex;
1506       for (i=0;i<n_vertices;i++) {
1507         used_vertex = PETSC_FALSE;
1508         k = 0;
1509         while (!used_vertex && k<nnsp_size) {
1510           ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1511           if (PetscAbsScalar(array[is_indices[i]])>0.0) {
1512             temp_indices_to_constraint[temp_indices[total_counts]]=is_indices[i];
1513             temp_quadrature_constraint[temp_indices[total_counts]]=1.0;
1514             temp_indices[total_counts+1]=temp_indices[total_counts]+1;
1515             total_counts++;
1516             used_vertex = PETSC_TRUE;
1517           }
1518           ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1519           k++;
1520         }
1521       }
1522     }
1523     ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1524     n_vertices = total_counts;
1525   }
1526 
1527   /* edges and faces */
1528   for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
1529     if (i<n_ISForEdges) {
1530       used_IS = &ISForEdges[i];
1531       boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
1532     } else {
1533       used_IS = &ISForFaces[i-n_ISForEdges];
1534       boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
1535     }
1536     temp_constraints = 0;          /* zero the number of constraints I have on this conn comp */
1537     temp_start_ptr = total_counts; /* need to know the starting index of constraints stored */
1538     ierr = ISGetSize(*used_IS,&size_of_constraint);CHKERRQ(ierr);
1539     ierr = ISGetIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1540     /* change of basis should not be performed on local periodic nodes */
1541     if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
1542     if (nnsp_has_cnst) {
1543       PetscScalar quad_value;
1544       temp_constraints++;
1545       quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
1546       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1547       for (j=0;j<size_of_constraint;j++) {
1548         temp_quadrature_constraint[temp_indices[total_counts]+j]=quad_value;
1549       }
1550       temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1551       total_counts++;
1552     }
1553     for (k=0;k<nnsp_size;k++) {
1554       PetscReal real_value;
1555       ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1556       ierr = PetscMemcpy(&temp_indices_to_constraint[temp_indices[total_counts]],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr);
1557       for (j=0;j<size_of_constraint;j++) {
1558         temp_quadrature_constraint[temp_indices[total_counts]+j]=array[is_indices[j]];
1559       }
1560       ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr);
1561       /* check if array is null on the connected component */
1562       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1563       PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_one));
1564       if (real_value > 0.0) { /* keep indices and values */
1565         temp_constraints++;
1566         temp_indices[total_counts+1]=temp_indices[total_counts]+size_of_constraint;  /* store new starting point */
1567         total_counts++;
1568       }
1569     }
1570     ierr = ISRestoreIndices(*used_IS,(const PetscInt**)&is_indices);CHKERRQ(ierr);
1571     valid_constraints = temp_constraints;
1572     /* perform SVD on the constraints if use_nnsp_true has not be requested by the user and there are non-null constraints on the cc */
1573     if (!pcbddc->use_nnsp_true && temp_constraints) {
1574       PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */
1575 
1576 #if defined(PETSC_MISSING_LAPACK_GESVD)
1577       /* SVD: Y = U*S*V^H                -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
1578          POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
1579          -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
1580             the constraints basis will differ (by a complex factor with absolute value equal to 1)
1581             from that computed using LAPACKgesvd
1582          -> This is due to a different computation of eigenvectors in LAPACKheev
1583          -> The quality of the POD-computed basis will be the same */
1584       ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr);
1585       /* Store upper triangular part of correlation matrix */
1586       ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1587       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1588       for (j=0;j<temp_constraints;j++) {
1589         for (k=0;k<j+1;k++) {
1590           PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k]=BLASdot_(&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr+k]],&Blas_one,&temp_quadrature_constraint[temp_indices[temp_start_ptr+j]],&Blas_one_2));
1591         }
1592       }
1593       /* compute eigenvalues and eigenvectors of correlation matrix */
1594       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1595       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr);
1596 #if !defined(PETSC_USE_COMPLEX)
1597       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
1598 #else
1599       PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
1600 #endif
1601       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1602       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
1603       /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
1604       j=0;
1605       while (j < temp_constraints && singular_vals[j] < tol) j++;
1606       total_counts=total_counts-j;
1607       valid_constraints = temp_constraints-j;
1608       /* scale and copy POD basis into used quadrature memory */
1609       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1610       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1611       ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr);
1612       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1613       ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr);
1614       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1615       if (j<temp_constraints) {
1616         PetscInt ii;
1617         for (k=j;k<temp_constraints;k++) singular_vals[k]=1.0/PetscSqrtReal(singular_vals[k]);
1618         ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1619         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
1620         ierr = PetscFPTrapPop();CHKERRQ(ierr);
1621         for (k=0;k<temp_constraints-j;k++) {
1622           for (ii=0;ii<size_of_constraint;ii++) {
1623             temp_quadrature_constraint[temp_indices[temp_start_ptr+k]+ii]=singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
1624           }
1625         }
1626       }
1627 #else  /* on missing GESVD */
1628       ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1629       ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr);
1630       ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1631       ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1632 #if !defined(PETSC_USE_COMPLEX)
1633       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,&lierr));
1634 #else
1635       PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&temp_quadrature_constraint[temp_indices[temp_start_ptr]],&Blas_LDA,singular_vals,&dummy_scalar_1,&dummy_int_1,&dummy_scalar_2,&dummy_int_2,work,&lwork,rwork,&lierr));
1636 #endif
1637       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
1638       ierr = PetscFPTrapPop();CHKERRQ(ierr);
1639       /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
1640       k = temp_constraints;
1641       if (k > size_of_constraint) k = size_of_constraint;
1642       j = 0;
1643       while (j < k && singular_vals[k-j-1] < tol) j++;
1644       total_counts = total_counts-temp_constraints+k-j;
1645       valid_constraints = k-j;
1646 #endif /* on missing GESVD */
1647     }
1648     /* setting change_of_basis flag is safe now */
1649     if (boolforchange) {
1650       for (j=0;j<valid_constraints;j++) {
1651         PetscBTSet(change_basis,total_counts-j-1);
1652       }
1653     }
1654   }
1655   /* free index sets of faces, edges and vertices */
1656   for (i=0;i<n_ISForFaces;i++) {
1657     ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr);
1658   }
1659   ierr = PetscFree(ISForFaces);CHKERRQ(ierr);
1660   for (i=0;i<n_ISForEdges;i++) {
1661     ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr);
1662   }
1663   ierr = PetscFree(ISForEdges);CHKERRQ(ierr);
1664   ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr);
1665   /* map temp_indices_to_constraint in boundary numbering */
1666   ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,temp_indices[total_counts],temp_indices_to_constraint,&i,temp_indices_to_constraint_B);CHKERRQ(ierr);
1667   if (i != temp_indices[total_counts]) {
1668     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for constraints indices %d != %d\n",temp_indices[total_counts],i);
1669   }
1670 
1671   /* free workspace */
1672   if (!pcbddc->use_nnsp_true && !skip_lapack) {
1673     ierr = PetscFree(work);CHKERRQ(ierr);
1674 #if defined(PETSC_USE_COMPLEX)
1675     ierr = PetscFree(rwork);CHKERRQ(ierr);
1676 #endif
1677     ierr = PetscFree(singular_vals);CHKERRQ(ierr);
1678 #if defined(PETSC_MISSING_LAPACK_GESVD)
1679     ierr = PetscFree(correlation_mat);CHKERRQ(ierr);
1680     ierr = PetscFree(temp_basis);CHKERRQ(ierr);
1681 #endif
1682   }
1683   for (k=0;k<nnsp_size;k++) {
1684     ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr);
1685   }
1686   ierr = PetscFree(localnearnullsp);CHKERRQ(ierr);
1687 
1688   /* set quantities in pcbddc data structure and store previous primal size */
1689   /* n_vertices defines the number of subdomain corners in the primal space */
1690   /* n_constraints defines the number of averages (they can be point primal dofs if change of basis is requested) */
1691   olocal_primal_size = pcbddc->local_primal_size;
1692   pcbddc->local_primal_size = total_counts;
1693   pcbddc->n_vertices = n_vertices;
1694   pcbddc->n_constraints = pcbddc->local_primal_size-pcbddc->n_vertices;
1695 
1696   /* Create constraint matrix */
1697   /* The constraint matrix is used to compute the l2g map of primal dofs */
1698   /* so we need to set it up properly either with or without change of basis */
1699   ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr);
1700   ierr = MatSetType(pcbddc->ConstraintMatrix,impMatType);CHKERRQ(ierr);
1701   ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr);
1702   /* array to compute a local numbering of constraints : vertices first then constraints */
1703   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_numbering);CHKERRQ(ierr);
1704   /* array to select the proper local node (of minimum index with respect to global ordering) when changing the basis */
1705   /* note: it should not be needed since IS for faces and edges are already sorted by global ordering when analyzing the graph but... just in case */
1706   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&aux_primal_minloc);CHKERRQ(ierr);
1707   /* auxiliary stuff for basis change */
1708   ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&global_indices);CHKERRQ(ierr);
1709   ierr = PetscBTCreate(pcis->n_B,&touched);CHKERRQ(ierr);
1710 
1711   /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
1712   total_primal_vertices=0;
1713   for (i=0;i<pcbddc->local_primal_size;i++) {
1714     size_of_constraint=temp_indices[i+1]-temp_indices[i];
1715     if (size_of_constraint == 1) {
1716       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]]);CHKERRQ(ierr);
1717       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]];
1718       aux_primal_minloc[total_primal_vertices]=0;
1719       total_primal_vertices++;
1720     } else if (PetscBTLookup(change_basis,i)) { /* Same procedure used in PCBDDCGetPrimalConstraintsLocalIdx */
1721       PetscInt min_loc,min_index;
1722       ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],global_indices);CHKERRQ(ierr);
1723       /* find first untouched local node */
1724       k = 0;
1725       while (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) k++;
1726       min_index = global_indices[k];
1727       min_loc = k;
1728       /* search the minimum among global nodes already untouched on the cc */
1729       for (k=1;k<size_of_constraint;k++) {
1730         /* there can be more than one constraint on a single connected component */
1731         if (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k]) && min_index > global_indices[k]) {
1732           min_index = global_indices[k];
1733           min_loc = k;
1734         }
1735       }
1736       ierr = PetscBTSet(touched,temp_indices_to_constraint_B[temp_indices[i]+min_loc]);CHKERRQ(ierr);
1737       aux_primal_numbering[total_primal_vertices]=temp_indices_to_constraint[temp_indices[i]+min_loc];
1738       aux_primal_minloc[total_primal_vertices]=min_loc;
1739       total_primal_vertices++;
1740     }
1741   }
1742   /* determine if a QR strategy is needed for change of basis */
1743   qr_needed = PETSC_FALSE;
1744   ierr = PetscBTCreate(pcbddc->local_primal_size,&qr_needed_idx);CHKERRQ(ierr);
1745   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1746     if (PetscBTLookup(change_basis,i)) {
1747       size_of_constraint = temp_indices[i+1]-temp_indices[i];
1748       j = 0;
1749       for (k=0;k<size_of_constraint;k++) {
1750         if (PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+k])) {
1751           j++;
1752         }
1753       }
1754       /* found more than one primal dof on the cc */
1755       if (j > 1) {
1756         PetscBTSet(qr_needed_idx,i);
1757         qr_needed = PETSC_TRUE;
1758       }
1759     }
1760   }
1761   /* free workspace */
1762   ierr = PetscFree(global_indices);CHKERRQ(ierr);
1763 
1764   /* permute indices in order to have a sorted set of vertices */
1765   ierr = PetscSortInt(total_primal_vertices,aux_primal_numbering);CHKERRQ(ierr);
1766 
1767   /* nonzero structure of constraint matrix */
1768   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1769   for (i=0;i<total_primal_vertices;i++) nnz[i]=1;
1770   j=total_primal_vertices;
1771   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1772     if (!PetscBTLookup(change_basis,i)) {
1773       nnz[j]=temp_indices[i+1]-temp_indices[i];
1774       j++;
1775     }
1776   }
1777   ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr);
1778   ierr = PetscFree(nnz);CHKERRQ(ierr);
1779   /* set values in constraint matrix */
1780   for (i=0;i<total_primal_vertices;i++) {
1781     ierr = MatSetValue(pcbddc->ConstraintMatrix,i,aux_primal_numbering[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
1782   }
1783   total_counts = total_primal_vertices;
1784   for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1785     if (!PetscBTLookup(change_basis,i)) {
1786       size_of_constraint=temp_indices[i+1]-temp_indices[i];
1787       ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&total_counts,size_of_constraint,&temp_indices_to_constraint[temp_indices[i]],&temp_quadrature_constraint[temp_indices[i]],INSERT_VALUES);CHKERRQ(ierr);
1788       total_counts++;
1789     }
1790   }
1791   /* assembling */
1792   ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793   ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   /*
1795   ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
1796   ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr);
1797   */
1798   /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
1799   if (pcbddc->use_change_of_basis) {
1800     /* dual and primal dofs on a single cc */
1801     PetscInt     dual_dofs,primal_dofs;
1802     /* iterator on aux_primal_minloc (ordered as read from nearnullspace: vertices, edges and then constraints) */
1803     PetscInt     primal_counter;
1804     /* working stuff for GEQRF */
1805     PetscScalar  *qr_basis,*qr_tau,*qr_work,lqr_work_t;
1806     PetscBLASInt lqr_work;
1807     /* working stuff for UNGQR */
1808     PetscScalar  *gqr_work,lgqr_work_t;
1809     PetscBLASInt lgqr_work;
1810     /* working stuff for TRTRS */
1811     PetscScalar  *trs_rhs;
1812     PetscBLASInt Blas_NRHS;
1813     /* pointers for values insertion into change of basis matrix */
1814     PetscInt     *start_rows,*start_cols;
1815     PetscScalar  *start_vals;
1816     /* working stuff for values insertion */
1817     PetscBT      is_primal;
1818 
1819     /* change of basis acts on local interfaces -> dimension is n_B x n_B */
1820     ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr);
1821     ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,impMatType);CHKERRQ(ierr);
1822     ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,pcis->n_B,pcis->n_B,pcis->n_B,pcis->n_B);CHKERRQ(ierr);
1823     /* work arrays */
1824     ierr = PetscMalloc(pcis->n_B*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
1825     for (i=0;i<pcis->n_B;i++) nnz[i]=1;
1826     /* nonzeros per row */
1827     for (i=pcbddc->n_vertices;i<pcbddc->local_primal_size;i++) {
1828       if (PetscBTLookup(change_basis,i)) {
1829         size_of_constraint = temp_indices[i+1]-temp_indices[i];
1830         if (PetscBTLookup(qr_needed_idx,i)) {
1831           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1832         } else {
1833           for (j=0;j<size_of_constraint;j++) nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = 2;
1834           /* get local primal index on the cc */
1835           j = 0;
1836           while (!PetscBTLookup(touched,temp_indices_to_constraint_B[temp_indices[i]+j])) j++;
1837           nnz[temp_indices_to_constraint_B[temp_indices[i]+j]] = size_of_constraint;
1838         }
1839       }
1840     }
1841     ierr = MatSeqAIJSetPreallocation(pcbddc->ChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr);
1842     ierr = PetscFree(nnz);CHKERRQ(ierr);
1843     /* Set initial identity in the matrix */
1844     for (i=0;i<pcis->n_B;i++) {
1845       ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr);
1846     }
1847 
1848     if (pcbddc->dbg_flag) {
1849       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr);
1850       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
1851     }
1852 
1853 
1854     /* Now we loop on the constraints which need a change of basis */
1855     /*
1856        Change of basis matrix is evaluated similarly to the FIRST APPROACH in
1857        Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
1858 
1859        Basic blocks of change of basis matrix T computed
1860 
1861           - Using the following block transformation if there is only a primal dof on the cc
1862             (in the example, primal dof is the last one of the edge in LOCAL ordering
1863              in this code, primal dof is the first one of the edge in GLOBAL ordering)
1864             | 1        0   ...        0     1 |
1865             | 0        1   ...        0     1 |
1866             |              ...                |
1867             | 0        ...            1     1 |
1868             | -s_1/s_n ...    -s_{n-1}/-s_n 1 |
1869 
1870           - via QR decomposition of constraints otherwise
1871     */
1872     if (qr_needed) {
1873       /* space to store Q */
1874       ierr = PetscMalloc((max_size_of_constraint)*(max_size_of_constraint)*sizeof(PetscScalar),&qr_basis);CHKERRQ(ierr);
1875       /* first we issue queries for optimal work */
1876       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1877       ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr);
1878       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1879       lqr_work = -1;
1880       PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
1881       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
1882       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr);
1883       ierr = PetscMalloc((PetscInt)PetscRealPart(lqr_work_t)*sizeof(*qr_work),&qr_work);CHKERRQ(ierr);
1884       lgqr_work = -1;
1885       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr);
1886       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr);
1887       ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr);
1888       ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1889       if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
1890       PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
1891       if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr);
1892       ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr);
1893       ierr = PetscMalloc((PetscInt)PetscRealPart(lgqr_work_t)*sizeof(*gqr_work),&gqr_work);CHKERRQ(ierr);
1894       /* array to store scaling factors for reflectors */
1895       ierr = PetscMalloc(max_constraints*sizeof(*qr_tau),&qr_tau);CHKERRQ(ierr);
1896       /* array to store rhs and solution of triangular solver */
1897       ierr = PetscMalloc(max_constraints*max_constraints*sizeof(*trs_rhs),&trs_rhs);CHKERRQ(ierr);
1898       /* allocating workspace for check */
1899       if (pcbddc->dbg_flag) {
1900         ierr = PetscMalloc(max_size_of_constraint*(max_constraints+max_size_of_constraint)*sizeof(*work),&work);CHKERRQ(ierr);
1901       }
1902     }
1903     /* array to store whether a node is primal or not */
1904     ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr);
1905     ierr = PetscMalloc(total_primal_vertices*sizeof(PetscInt),&aux_primal_numbering_B);CHKERRQ(ierr);
1906     ierr = ISGlobalToLocalMappingApply(pcbddc->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,aux_primal_numbering,&i,aux_primal_numbering_B);CHKERRQ(ierr);
1907     if (i != total_primal_vertices) {
1908       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Error in boundary numbering for BDDC vertices! %d != %d\n",total_primal_vertices,i);
1909     }
1910     for (i=0;i<total_primal_vertices;i++) {
1911       ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr);
1912     }
1913     ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr);
1914 
1915     /* loop on constraints and see whether or not they need a change of basis and compute it */
1916     /* -> using implicit ordering contained in temp_indices data */
1917     total_counts = pcbddc->n_vertices;
1918     primal_counter = total_counts;
1919     while (total_counts<pcbddc->local_primal_size) {
1920       primal_dofs = 1;
1921       if (PetscBTLookup(change_basis,total_counts)) {
1922         /* get all constraints with same support: if more then one constraint is present on the cc then surely indices are stored contiguosly */
1923         while (total_counts+primal_dofs < pcbddc->local_primal_size && temp_indices_to_constraint_B[temp_indices[total_counts]] == temp_indices_to_constraint_B[temp_indices[total_counts+primal_dofs]]) {
1924           primal_dofs++;
1925         }
1926         /* get constraint info */
1927         size_of_constraint = temp_indices[total_counts+1]-temp_indices[total_counts];
1928         dual_dofs = size_of_constraint-primal_dofs;
1929 
1930         if (pcbddc->dbg_flag) {
1931           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d to %d (incl) need a change of basis (size %d)\n",total_counts,total_counts+primal_dofs-1,size_of_constraint);CHKERRQ(ierr);
1932         }
1933 
1934         if (primal_dofs > 1) { /* QR */
1935 
1936           /* copy quadrature constraints for change of basis check */
1937           if (pcbddc->dbg_flag) {
1938             ierr = PetscMemcpy(work,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1939           }
1940           /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
1941           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1942 
1943           /* compute QR decomposition of constraints */
1944           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1945           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1946           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1947           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1948           PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
1949           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
1950           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1951 
1952           /* explictly compute R^-T */
1953           ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr);
1954           for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
1955           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1956           ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr);
1957           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1958           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1959           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1960           PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
1961           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
1962           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1963 
1964           /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */
1965           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1966           ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
1967           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1968           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1969           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1970           PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
1971           if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr);
1972           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1973 
1974           /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
1975              i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
1976              where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
1977           ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr);
1978           ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr);
1979           ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr);
1980           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
1981           ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr);
1982           ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr);
1983           ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
1984           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,&temp_quadrature_constraint[temp_indices[total_counts]],&Blas_LDC));
1985           ierr = PetscFPTrapPop();CHKERRQ(ierr);
1986           ierr = PetscMemcpy(qr_basis,&temp_quadrature_constraint[temp_indices[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr);
1987 
1988           /* insert values in change of basis matrix respecting global ordering of new primal dofs */
1989           start_rows = &temp_indices_to_constraint_B[temp_indices[total_counts]];
1990           /* insert cols for primal dofs */
1991           for (j=0;j<primal_dofs;j++) {
1992             start_vals = &qr_basis[j*size_of_constraint];
1993             start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter+j]];
1994             ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
1995           }
1996           /* insert cols for dual dofs */
1997           for (j=0,k=0;j<dual_dofs;k++) {
1998             if (!PetscBTLookup(is_primal,temp_indices_to_constraint_B[temp_indices[total_counts]+k])) {
1999               start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
2000               start_cols = &temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2001               ierr = MatSetValues(pcbddc->ChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr);
2002               j++;
2003             }
2004           }
2005 
2006           /* check change of basis */
2007           if (pcbddc->dbg_flag) {
2008             PetscInt   ii,jj;
2009             PetscBool valid_qr=PETSC_TRUE;
2010             ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr);
2011             ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr);
2012             ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr);
2013             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr);
2014             ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr);
2015             ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr);
2016             ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
2017             PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&work[size_of_constraint*primal_dofs],&Blas_LDC));
2018             ierr = PetscFPTrapPop();CHKERRQ(ierr);
2019             for (jj=0;jj<size_of_constraint;jj++) {
2020               for (ii=0;ii<primal_dofs;ii++) {
2021                 if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
2022                 if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE;
2023               }
2024             }
2025             if (!valid_qr) {
2026               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2027               for (jj=0;jj<size_of_constraint;jj++) {
2028                 for (ii=0;ii<primal_dofs;ii++) {
2029                   if (ii != jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
2030                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2031                   }
2032                   if (ii == jj && PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) {
2033                     PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
2034                   }
2035                 }
2036               }
2037             } else {
2038               ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n",PetscGlobalRank);CHKERRQ(ierr);
2039             }
2040           }
2041         } else { /* simple transformation block */
2042           PetscInt row,col;
2043           PetscScalar val;
2044           for (j=0;j<size_of_constraint;j++) {
2045             row = temp_indices_to_constraint_B[temp_indices[total_counts]+j];
2046             if (!PetscBTLookup(is_primal,row)) {
2047               col = temp_indices_to_constraint_B[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2048               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr);
2049               ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,1.0,INSERT_VALUES);CHKERRQ(ierr);
2050             } else {
2051               for (k=0;k<size_of_constraint;k++) {
2052                 col = temp_indices_to_constraint_B[temp_indices[total_counts]+k];
2053                 if (row != col) {
2054                   val = -temp_quadrature_constraint[temp_indices[total_counts]+k]/temp_quadrature_constraint[temp_indices[total_counts]+aux_primal_minloc[primal_counter]];
2055                 } else {
2056                   val = 1.0;
2057                 }
2058                 ierr = MatSetValue(pcbddc->ChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr);
2059               }
2060             }
2061           }
2062         }
2063         /* increment primal counter */
2064         primal_counter += primal_dofs;
2065       } else {
2066         if (pcbddc->dbg_flag) {
2067           ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,temp_indices[total_counts+1]-temp_indices[total_counts]);CHKERRQ(ierr);
2068         }
2069       }
2070       /* increment constraint counter total_counts */
2071       total_counts += primal_dofs;
2072     }
2073 
2074     /* free workspace */
2075     if (qr_needed) {
2076       if (pcbddc->dbg_flag) {
2077         ierr = PetscFree(work);CHKERRQ(ierr);
2078       }
2079       ierr = PetscFree(trs_rhs);CHKERRQ(ierr);
2080       ierr = PetscFree(qr_tau);CHKERRQ(ierr);
2081       ierr = PetscFree(qr_work);CHKERRQ(ierr);
2082       ierr = PetscFree(gqr_work);CHKERRQ(ierr);
2083       ierr = PetscFree(qr_basis);CHKERRQ(ierr);
2084     }
2085     ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr);
2086     /* assembling */
2087     ierr = MatAssemblyBegin(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088     ierr = MatAssemblyEnd(pcbddc->ChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089     /*
2090     ierr = PetscViewerSetFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr);
2091     ierr = MatView(pcbddc->ChangeOfBasisMatrix,(PetscViewer)0);CHKERRQ(ierr);
2092     */
2093   }
2094 
2095   /* get indices in local ordering for vertices and constraints */
2096   if (olocal_primal_size == pcbddc->local_primal_size) { /* if this is true, I need to check if a new primal space has been introduced */
2097     ierr = PetscMalloc(olocal_primal_size*sizeof(PetscInt),&oprimal_indices_local_idxs);CHKERRQ(ierr);
2098     ierr = PetscMemcpy(oprimal_indices_local_idxs,pcbddc->primal_indices_local_idxs,olocal_primal_size*sizeof(PetscInt));CHKERRQ(ierr);
2099   }
2100   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2101   ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr);
2102   ierr = PCBDDCGetPrimalVerticesLocalIdx(pc,&i,&aux_primal_numbering);CHKERRQ(ierr);
2103   ierr = PetscMemcpy(pcbddc->primal_indices_local_idxs,aux_primal_numbering,i*sizeof(PetscInt));CHKERRQ(ierr);
2104   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2105   ierr = PCBDDCGetPrimalConstraintsLocalIdx(pc,&j,&aux_primal_numbering);CHKERRQ(ierr);
2106   ierr = PetscMemcpy(&pcbddc->primal_indices_local_idxs[i],aux_primal_numbering,j*sizeof(PetscInt));CHKERRQ(ierr);
2107   ierr = PetscFree(aux_primal_numbering);CHKERRQ(ierr);
2108   /* set quantities in PCBDDC data struct */
2109   pcbddc->n_actual_vertices = i;
2110   /* check if a new primal space has been introduced */
2111   pcbddc->new_primal_space_local = PETSC_TRUE;
2112   if (olocal_primal_size == pcbddc->local_primal_size) {
2113     ierr = PetscMemcmp(pcbddc->primal_indices_local_idxs,oprimal_indices_local_idxs,olocal_primal_size,&pcbddc->new_primal_space_local);CHKERRQ(ierr);
2114     pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
2115     ierr = PetscFree(oprimal_indices_local_idxs);CHKERRQ(ierr);
2116   }
2117   /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
2118   ierr = MPI_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
2119 
2120   /* flush dbg viewer */
2121   if (pcbddc->dbg_flag) {
2122     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
2123   }
2124 
2125   /* free workspace */
2126   ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2127   ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr);
2128   ierr = PetscFree(aux_primal_minloc);CHKERRQ(ierr);
2129   ierr = PetscFree(temp_indices);CHKERRQ(ierr);
2130   ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr);
2131   ierr = PetscFree(temp_indices_to_constraint);CHKERRQ(ierr);
2132   ierr = PetscFree(temp_indices_to_constraint_B);CHKERRQ(ierr);
2133   ierr = PetscFree(temp_quadrature_constraint);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "PCBDDCAnalyzeInterface"
2139 PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
2140 {
2141   PC_BDDC     *pcbddc = (PC_BDDC*)pc->data;
2142   PC_IS       *pcis = (PC_IS*)pc->data;
2143   Mat_IS      *matis  = (Mat_IS*)pc->pmat->data;
2144   PetscInt    ierr,i,vertex_size;
2145   PetscViewer viewer=pcbddc->dbg_viewer;
2146 
2147   PetscFunctionBegin;
2148   /* Reset previously computed graph */
2149   ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr);
2150   /* Init local Graph struct */
2151   ierr = PCBDDCGraphInit(pcbddc->mat_graph,matis->mapping);CHKERRQ(ierr);
2152 
2153   /* Check validity of the csr graph passed in by the user */
2154   if (pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) {
2155     ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr);
2156   }
2157 
2158   /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
2159   if (!pcbddc->mat_graph->xadj || !pcbddc->mat_graph->adjncy) {
2160     Mat mat_adj;
2161     const PetscInt *xadj,*adjncy;
2162     PetscBool flg_row=PETSC_TRUE;
2163 
2164     ierr = MatConvert(matis->A,MATMPIADJ,MAT_INITIAL_MATRIX,&mat_adj);CHKERRQ(ierr);
2165     ierr = MatGetRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2166     if (!flg_row) {
2167       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatGetRowIJ called in %s\n",__FUNCT__);
2168     }
2169     ierr = PCBDDCSetLocalAdjacencyGraph(pc,i,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr);
2170     ierr = MatRestoreRowIJ(mat_adj,0,PETSC_TRUE,PETSC_FALSE,&i,&xadj,&adjncy,&flg_row);CHKERRQ(ierr);
2171     if (!flg_row) {
2172       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in MatRestoreRowIJ called in %s\n",__FUNCT__);
2173     }
2174     ierr = MatDestroy(&mat_adj);CHKERRQ(ierr);
2175   }
2176 
2177   /* Set default dofs' splitting if no information has been provided by the user with PCBDDCSetDofsSplitting or PCBDDCSetDofsSplittingLocal */
2178   vertex_size = 1;
2179   if (pcbddc->user_provided_isfordofs) {
2180     if (pcbddc->n_ISForDofs) { /* need to convert from global to local and remove references to global dofs splitting */
2181       ierr = PetscMalloc(pcbddc->n_ISForDofs*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2182       for (i=0;i<pcbddc->n_ISForDofs;i++) {
2183         ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2184         ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr);
2185       }
2186       pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
2187       pcbddc->n_ISForDofs = 0;
2188       ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr);
2189     }
2190     /* mat block size as vertex size (used for elasticity with rigid body modes as nearnullspace) */
2191     ierr = MatGetBlockSize(matis->A,&vertex_size);CHKERRQ(ierr);
2192   } else {
2193     if (!pcbddc->n_ISForDofsLocal) { /* field split not present, create it in local ordering */
2194       ierr = MatGetBlockSize(pc->pmat,&pcbddc->n_ISForDofsLocal);CHKERRQ(ierr);
2195       ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&pcbddc->ISForDofsLocal);CHKERRQ(ierr);
2196       for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
2197         ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),pcis->n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr);
2198       }
2199     }
2200   }
2201 
2202   /* Setup of Graph */
2203   if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { /* need to convert from global to local */
2204     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr);
2205   }
2206   if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { /* need to convert from global to local */
2207     ierr = PCBDDCGlobalToLocal(pc,matis->ctx,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr);
2208   }
2209   ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices);
2210 
2211   /* Graph's connected components analysis */
2212   ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr);
2213 
2214   /* print some info to stdout */
2215   if (pcbddc->dbg_flag) {
2216     ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,viewer);
2217   }
2218 
2219   /* mark topography has done */
2220   pcbddc->recompute_topography = PETSC_FALSE;
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 #undef __FUNCT__
2225 #define __FUNCT__ "PCBDDCGetPrimalVerticesLocalIdx"
2226 PetscErrorCode  PCBDDCGetPrimalVerticesLocalIdx(PC pc, PetscInt *n_vertices, PetscInt **vertices_idx)
2227 {
2228   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2229   PetscInt       *vertices,*row_cmat_indices,n,i,size_of_constraint,local_primal_size;
2230   PetscErrorCode ierr;
2231 
2232   PetscFunctionBegin;
2233   n = 0;
2234   vertices = 0;
2235   if (pcbddc->ConstraintMatrix) {
2236     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&i);CHKERRQ(ierr);
2237     for (i=0;i<local_primal_size;i++) {
2238       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2239       if (size_of_constraint == 1) n++;
2240       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2241     }
2242     if (vertices_idx) {
2243       ierr = PetscMalloc(n*sizeof(PetscInt),&vertices);CHKERRQ(ierr);
2244       n = 0;
2245       for (i=0;i<local_primal_size;i++) {
2246         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2247         if (size_of_constraint == 1) {
2248           vertices[n++]=row_cmat_indices[0];
2249         }
2250         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2251       }
2252     }
2253   }
2254   *n_vertices = n;
2255   if (vertices_idx) *vertices_idx = vertices;
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "PCBDDCGetPrimalConstraintsLocalIdx"
2261 PetscErrorCode  PCBDDCGetPrimalConstraintsLocalIdx(PC pc, PetscInt *n_constraints, PetscInt **constraints_idx)
2262 {
2263   PC_BDDC        *pcbddc = (PC_BDDC*)(pc->data);
2264   PetscInt       *constraints_index,*row_cmat_indices,*row_cmat_global_indices;
2265   PetscInt       n,i,j,size_of_constraint,local_primal_size,local_size,max_size_of_constraint,min_index,min_loc;
2266   PetscBT        touched;
2267   PetscErrorCode ierr;
2268 
2269     /* This function assumes that the number of local constraints per connected component
2270        is not greater than the number of nodes defined for the connected component
2271        (otherwise we will surely have linear dependence between constraints and thus a singular coarse problem) */
2272   PetscFunctionBegin;
2273   n = 0;
2274   constraints_index = 0;
2275   if (pcbddc->ConstraintMatrix) {
2276     ierr = MatGetSize(pcbddc->ConstraintMatrix,&local_primal_size,&local_size);CHKERRQ(ierr);
2277     max_size_of_constraint = 0;
2278     for (i=0;i<local_primal_size;i++) {
2279       ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2280       if (size_of_constraint > 1) {
2281         n++;
2282       }
2283       max_size_of_constraint = PetscMax(size_of_constraint,max_size_of_constraint);
2284       ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,NULL,NULL);CHKERRQ(ierr);
2285     }
2286     if (constraints_idx) {
2287       ierr = PetscMalloc(n*sizeof(PetscInt),&constraints_index);CHKERRQ(ierr);
2288       ierr = PetscMalloc(max_size_of_constraint*sizeof(PetscInt),&row_cmat_global_indices);CHKERRQ(ierr);
2289       ierr = PetscBTCreate(local_size,&touched);CHKERRQ(ierr);
2290       n = 0;
2291       for (i=0;i<local_primal_size;i++) {
2292         ierr = MatGetRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2293         if (size_of_constraint > 1) {
2294           ierr = ISLocalToGlobalMappingApply(pcbddc->mat_graph->l2gmap,size_of_constraint,row_cmat_indices,row_cmat_global_indices);CHKERRQ(ierr);
2295           /* find first untouched local node */
2296           j = 0;
2297           while (PetscBTLookup(touched,row_cmat_indices[j])) j++;
2298           min_index = row_cmat_global_indices[j];
2299           min_loc = j;
2300           /* search the minimum among nodes not yet touched on the connected component
2301              since there can be more than one constraint on a single cc */
2302           for (j=1;j<size_of_constraint;j++) {
2303             if (!PetscBTLookup(touched,row_cmat_indices[j]) && min_index > row_cmat_global_indices[j]) {
2304               min_index = row_cmat_global_indices[j];
2305               min_loc = j;
2306             }
2307           }
2308           ierr = PetscBTSet(touched,row_cmat_indices[min_loc]);CHKERRQ(ierr);
2309           constraints_index[n++] = row_cmat_indices[min_loc];
2310         }
2311         ierr = MatRestoreRow(pcbddc->ConstraintMatrix,i,&size_of_constraint,(const PetscInt**)&row_cmat_indices,NULL);CHKERRQ(ierr);
2312       }
2313       ierr = PetscBTDestroy(&touched);CHKERRQ(ierr);
2314       ierr = PetscFree(row_cmat_global_indices);CHKERRQ(ierr);
2315     }
2316   }
2317   *n_constraints = n;
2318   if (constraints_idx) *constraints_idx = constraints_index;
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 /* the next two functions has been adapted from pcis.c */
2323 #undef __FUNCT__
2324 #define __FUNCT__ "PCBDDCApplySchur"
2325 PetscErrorCode  PCBDDCApplySchur(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2326 {
2327   PetscErrorCode ierr;
2328   PC_IS          *pcis = (PC_IS*)(pc->data);
2329 
2330   PetscFunctionBegin;
2331   if (!vec2_B) { vec2_B = v; }
2332   ierr = MatMult(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2333   ierr = MatMult(pcis->A_IB,v,vec1_D);CHKERRQ(ierr);
2334   ierr = KSPSolve(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2335   ierr = MatMult(pcis->A_BI,vec2_D,vec2_B);CHKERRQ(ierr);
2336   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "PCBDDCApplySchurTranspose"
2342 PetscErrorCode  PCBDDCApplySchurTranspose(PC pc, Vec v, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
2343 {
2344   PetscErrorCode ierr;
2345   PC_IS          *pcis = (PC_IS*)(pc->data);
2346 
2347   PetscFunctionBegin;
2348   if (!vec2_B) { vec2_B = v; }
2349   ierr = MatMultTranspose(pcis->A_BB,v,vec1_B);CHKERRQ(ierr);
2350   ierr = MatMultTranspose(pcis->A_BI,v,vec1_D);CHKERRQ(ierr);
2351   ierr = KSPSolveTranspose(pcis->ksp_D,vec1_D,vec2_D);CHKERRQ(ierr);
2352   ierr = MatMultTranspose(pcis->A_IB,vec2_D,vec2_B);CHKERRQ(ierr);
2353   ierr = VecAXPY(vec1_B,-1.0,vec2_B);CHKERRQ(ierr);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "PCBDDCSubsetNumbering"
2359 PetscErrorCode PCBDDCSubsetNumbering(MPI_Comm comm,ISLocalToGlobalMapping l2gmap, PetscInt n_local_dofs, PetscInt local_dofs[], PetscInt local_dofs_mult[], PetscInt* n_global_subset, PetscInt* global_numbering_subset[])
2360 {
2361   Vec            local_vec,global_vec;
2362   IS             seqis,paris;
2363   VecScatter     scatter_ctx;
2364   PetscScalar    *array;
2365   PetscInt       *temp_global_dofs;
2366   PetscScalar    globalsum;
2367   PetscInt       i,j,s;
2368   PetscInt       nlocals,first_index,old_index,max_local;
2369   PetscMPIInt    rank_prec_comm,size_prec_comm,max_global;
2370   PetscMPIInt    *dof_sizes,*dof_displs;
2371   PetscBool      first_found;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   /* mpi buffers */
2376   MPI_Comm_size(comm,&size_prec_comm);
2377   MPI_Comm_rank(comm,&rank_prec_comm);
2378   j = ( !rank_prec_comm ? size_prec_comm : 0);
2379   ierr = PetscMalloc(j*sizeof(*dof_sizes),&dof_sizes);CHKERRQ(ierr);
2380   ierr = PetscMalloc(j*sizeof(*dof_displs),&dof_displs);CHKERRQ(ierr);
2381   /* get maximum size of subset */
2382   ierr = PetscMalloc(n_local_dofs*sizeof(PetscInt),&temp_global_dofs);CHKERRQ(ierr);
2383   ierr = ISLocalToGlobalMappingApply(l2gmap,n_local_dofs,local_dofs,temp_global_dofs);CHKERRQ(ierr);
2384   max_local = 0;
2385   if (n_local_dofs) {
2386     max_local = temp_global_dofs[0];
2387     for (i=1;i<n_local_dofs;i++) {
2388       if (max_local < temp_global_dofs[i] ) {
2389         max_local = temp_global_dofs[i];
2390       }
2391     }
2392   }
2393   ierr = MPI_Allreduce(&max_local,&max_global,1,MPIU_INT,MPI_MAX,comm);
2394   max_global++;
2395   max_local = 0;
2396   if (n_local_dofs) {
2397     max_local = local_dofs[0];
2398     for (i=1;i<n_local_dofs;i++) {
2399       if (max_local < local_dofs[i] ) {
2400         max_local = local_dofs[i];
2401       }
2402     }
2403   }
2404   max_local++;
2405   /* allocate workspace */
2406   ierr = VecCreate(PETSC_COMM_SELF,&local_vec);CHKERRQ(ierr);
2407   ierr = VecSetSizes(local_vec,PETSC_DECIDE,max_local);CHKERRQ(ierr);
2408   ierr = VecSetType(local_vec,VECSEQ);CHKERRQ(ierr);
2409   ierr = VecCreate(comm,&global_vec);CHKERRQ(ierr);
2410   ierr = VecSetSizes(global_vec,PETSC_DECIDE,max_global);CHKERRQ(ierr);
2411   ierr = VecSetType(global_vec,VECMPI);CHKERRQ(ierr);
2412   /* create scatter */
2413   ierr = ISCreateGeneral(PETSC_COMM_SELF,n_local_dofs,local_dofs,PETSC_COPY_VALUES,&seqis);CHKERRQ(ierr);
2414   ierr = ISCreateGeneral(comm,n_local_dofs,temp_global_dofs,PETSC_COPY_VALUES,&paris);CHKERRQ(ierr);
2415   ierr = VecScatterCreate(local_vec,seqis,global_vec,paris,&scatter_ctx);CHKERRQ(ierr);
2416   ierr = ISDestroy(&seqis);CHKERRQ(ierr);
2417   ierr = ISDestroy(&paris);CHKERRQ(ierr);
2418   /* init array */
2419   ierr = VecSet(global_vec,0.0);CHKERRQ(ierr);
2420   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2421   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2422   if (local_dofs_mult) {
2423     for (i=0;i<n_local_dofs;i++) {
2424       array[local_dofs[i]]=(PetscScalar)local_dofs_mult[i];
2425     }
2426   } else {
2427     for (i=0;i<n_local_dofs;i++) {
2428       array[local_dofs[i]]=1.0;
2429     }
2430   }
2431   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2432   /* scatter into global vec and get total number of global dofs */
2433   ierr = VecScatterBegin(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2434   ierr = VecScatterEnd(scatter_ctx,local_vec,global_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2435   ierr = VecSum(global_vec,&globalsum);CHKERRQ(ierr);
2436   *n_global_subset = (PetscInt)PetscRealPart(globalsum);
2437   /* Fill global_vec with cumulative function for global numbering */
2438   ierr = VecGetArray(global_vec,&array);CHKERRQ(ierr);
2439   ierr = VecGetLocalSize(global_vec,&s);CHKERRQ(ierr);
2440   nlocals = 0;
2441   first_index = -1;
2442   first_found = PETSC_FALSE;
2443   for (i=0;i<s;i++) {
2444     if (!first_found && PetscRealPart(array[i]) > 0.0) {
2445       first_found = PETSC_TRUE;
2446       first_index = i;
2447     }
2448     nlocals += (PetscInt)PetscRealPart(array[i]);
2449   }
2450   ierr = MPI_Gather(&nlocals,1,MPIU_INT,dof_sizes,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2451   if (!rank_prec_comm) {
2452     dof_displs[0]=0;
2453     for (i=1;i<size_prec_comm;i++) {
2454       dof_displs[i] = dof_displs[i-1]+dof_sizes[i-1];
2455     }
2456   }
2457   ierr = MPI_Scatter(dof_displs,1,MPIU_INT,&nlocals,1,MPIU_INT,0,comm);CHKERRQ(ierr);
2458   if (first_found) {
2459     array[first_index] += (PetscScalar)nlocals;
2460     old_index = first_index;
2461     for (i=first_index+1;i<s;i++) {
2462       if (PetscRealPart(array[i]) > 0.0) {
2463         array[i] += array[old_index];
2464         old_index = i;
2465       }
2466     }
2467   }
2468   ierr = VecRestoreArray(global_vec,&array);CHKERRQ(ierr);
2469   ierr = VecSet(local_vec,0.0);CHKERRQ(ierr);
2470   ierr = VecScatterBegin(scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2471   ierr = VecScatterEnd  (scatter_ctx,global_vec,local_vec,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2472   /* get global ordering of local dofs */
2473   ierr = VecGetArray(local_vec,&array);CHKERRQ(ierr);
2474   if (local_dofs_mult) {
2475     for (i=0;i<n_local_dofs;i++) {
2476       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-local_dofs_mult[i];
2477     }
2478   } else {
2479     for (i=0;i<n_local_dofs;i++) {
2480       temp_global_dofs[i] = (PetscInt)PetscRealPart(array[local_dofs[i]])-1;
2481     }
2482   }
2483   ierr = VecRestoreArray(local_vec,&array);CHKERRQ(ierr);
2484   /* free workspace */
2485   ierr = VecScatterDestroy(&scatter_ctx);CHKERRQ(ierr);
2486   ierr = VecDestroy(&local_vec);CHKERRQ(ierr);
2487   ierr = VecDestroy(&global_vec);CHKERRQ(ierr);
2488   ierr = PetscFree(dof_sizes);CHKERRQ(ierr);
2489   ierr = PetscFree(dof_displs);CHKERRQ(ierr);
2490   /* return pointer to global ordering of local dofs */
2491   *global_numbering_subset = temp_global_dofs;
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #undef __FUNCT__
2496 #define __FUNCT__ "PCBDDCOrthonormalizeVecs"
2497 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[])
2498 {
2499   PetscInt       i,j;
2500   PetscScalar    *alphas;
2501   PetscErrorCode ierr;
2502 
2503   PetscFunctionBegin;
2504   /* this implements stabilized Gram-Schmidt */
2505   ierr = PetscMalloc(n*sizeof(PetscScalar),&alphas);CHKERRQ(ierr);
2506   for (i=0;i<n;i++) {
2507     ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr);
2508     if (i<n) { ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],&alphas[i+1]);CHKERRQ(ierr); }
2509     for (j=i+1;j<n;j++) { ierr = VecAXPY(vecs[j],PetscConj(-alphas[j]),vecs[i]);CHKERRQ(ierr); }
2510   }
2511   ierr = PetscFree(alphas);CHKERRQ(ierr);
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 /* TODO
2516    - now preallocation is done assuming SEQDENSE local matrices
2517 */
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatISGetMPIXAIJ"
2520 static PetscErrorCode MatISGetMPIXAIJ(Mat mat, MatType Mtype, MatReuse reuse, Mat *M)
2521 {
2522   Mat                    new_mat;
2523   Mat_IS                 *matis = (Mat_IS*)(mat->data);
2524   /* info on mat */
2525   /* ISLocalToGlobalMapping rmapping,cmapping; */
2526   PetscInt               bs,rows,cols;
2527   PetscInt               lrows,lcols;
2528   PetscInt               local_rows,local_cols;
2529   PetscBool              isdense;
2530   /* values insertion */
2531   PetscScalar            *array;
2532   PetscInt               *local_indices,*global_indices;
2533   /* work */
2534   PetscInt               i,j,index_row;
2535   PetscErrorCode         ierr;
2536 
2537   PetscFunctionBegin;
2538   /* MISSING CHECKS
2539     - rectangular case not covered (it is not allowed by MATIS)
2540   */
2541   /* get info from mat */
2542   /* ierr = MatGetLocalToGlobalMapping(mat,&rmapping,&cmapping);CHKERRQ(ierr); */
2543   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
2544   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2545   ierr = MatGetSize(matis->A,&local_rows,&local_cols);CHKERRQ(ierr);
2546 
2547   /* work */
2548   ierr = PetscMalloc(local_rows*sizeof(*local_indices),&local_indices);CHKERRQ(ierr);
2549   for (i=0;i<local_rows;i++) local_indices[i]=i;
2550   /* map indices of local mat to global values */
2551   ierr = PetscMalloc(PetscMax(local_cols,local_rows)*sizeof(*global_indices),&global_indices);CHKERRQ(ierr);
2552   /* ierr = ISLocalToGlobalMappingApply(rmapping,local_rows,local_indices,global_indices);CHKERRQ(ierr); */
2553   ierr = ISLocalToGlobalMappingApply(matis->mapping,local_rows,local_indices,global_indices);CHKERRQ(ierr);
2554 
2555   if (reuse==MAT_INITIAL_MATRIX) {
2556     Vec         vec_dnz,vec_onz;
2557     PetscScalar *my_dnz,*my_onz;
2558     PetscInt    *dnz,*onz,*mat_ranges,*row_ownership;
2559     PetscInt    index_col,owner;
2560     PetscMPIInt nsubdomains;
2561 
2562     ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&nsubdomains);CHKERRQ(ierr);
2563     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&new_mat);CHKERRQ(ierr);
2564     ierr = MatSetSizes(new_mat,PETSC_DECIDE,PETSC_DECIDE,rows,cols);CHKERRQ(ierr);
2565     ierr = MatSetBlockSize(new_mat,bs);CHKERRQ(ierr);
2566     ierr = MatSetType(new_mat,Mtype);CHKERRQ(ierr);
2567     ierr = MatSetUp(new_mat);CHKERRQ(ierr);
2568     ierr = MatGetLocalSize(new_mat,&lrows,&lcols);CHKERRQ(ierr);
2569 
2570     /*
2571       preallocation
2572     */
2573 
2574     ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)new_mat),lrows,lcols,dnz,onz);CHKERRQ(ierr);
2575     /*
2576        Some vectors are needed to sum up properly on shared interface dofs.
2577        Preallocation macros cannot do the job.
2578        Note that preallocation is not exact, since it overestimates nonzeros
2579     */
2580     ierr = MatGetVecs(new_mat,NULL,&vec_dnz);CHKERRQ(ierr);
2581     /* ierr = VecSetLocalToGlobalMapping(vec_dnz,rmapping);CHKERRQ(ierr); */
2582     ierr = VecSetLocalToGlobalMapping(vec_dnz,matis->mapping);CHKERRQ(ierr);
2583     ierr = VecDuplicate(vec_dnz,&vec_onz);CHKERRQ(ierr);
2584     /* All processes need to compute entire row ownership */
2585     ierr = PetscMalloc(rows*sizeof(*row_ownership),&row_ownership);CHKERRQ(ierr);
2586     ierr = MatGetOwnershipRanges(new_mat,(const PetscInt**)&mat_ranges);CHKERRQ(ierr);
2587     for (i=0;i<nsubdomains;i++) {
2588       for (j=mat_ranges[i];j<mat_ranges[i+1];j++) {
2589         row_ownership[j]=i;
2590       }
2591     }
2592 
2593     /*
2594        my_dnz and my_onz contains exact contribution to preallocation from each local mat
2595        then, they will be summed up properly. This way, preallocation is always sufficient
2596     */
2597     ierr = PetscMalloc(local_rows*sizeof(*my_dnz),&my_dnz);CHKERRQ(ierr);
2598     ierr = PetscMalloc(local_rows*sizeof(*my_onz),&my_onz);CHKERRQ(ierr);
2599     ierr = PetscMemzero(my_dnz,local_rows*sizeof(*my_dnz));CHKERRQ(ierr);
2600     ierr = PetscMemzero(my_onz,local_rows*sizeof(*my_onz));CHKERRQ(ierr);
2601     for (i=0;i<local_rows;i++) {
2602       index_row = global_indices[i];
2603       for (j=i;j<local_rows;j++) {
2604         owner = row_ownership[index_row];
2605         index_col = global_indices[j];
2606         if (index_col > mat_ranges[owner]-1 && index_col < mat_ranges[owner+1] ) { /* diag block */
2607           my_dnz[i] += 1.0;
2608         } else { /* offdiag block */
2609           my_onz[i] += 1.0;
2610         }
2611         /* same as before, interchanging rows and cols */
2612         if (i != j) {
2613           owner = row_ownership[index_col];
2614           if (index_row > mat_ranges[owner]-1 && index_row < mat_ranges[owner+1] ) {
2615             my_dnz[j] += 1.0;
2616           } else {
2617             my_onz[j] += 1.0;
2618           }
2619         }
2620       }
2621     }
2622     ierr = VecSet(vec_dnz,0.0);CHKERRQ(ierr);
2623     ierr = VecSet(vec_onz,0.0);CHKERRQ(ierr);
2624     if (local_rows) { /* multilevel guard */
2625       ierr = VecSetValuesLocal(vec_dnz,local_rows,local_indices,my_dnz,ADD_VALUES);CHKERRQ(ierr);
2626       ierr = VecSetValuesLocal(vec_onz,local_rows,local_indices,my_onz,ADD_VALUES);CHKERRQ(ierr);
2627     }
2628     ierr = VecAssemblyBegin(vec_dnz);CHKERRQ(ierr);
2629     ierr = VecAssemblyBegin(vec_onz);CHKERRQ(ierr);
2630     ierr = VecAssemblyEnd(vec_dnz);CHKERRQ(ierr);
2631     ierr = VecAssemblyEnd(vec_onz);CHKERRQ(ierr);
2632     ierr = PetscFree(my_dnz);CHKERRQ(ierr);
2633     ierr = PetscFree(my_onz);CHKERRQ(ierr);
2634     ierr = PetscFree(row_ownership);CHKERRQ(ierr);
2635 
2636     /* set computed preallocation in dnz and onz */
2637     ierr = VecGetArray(vec_dnz,&array);CHKERRQ(ierr);
2638     for (i=0; i<lrows; i++) dnz[i] = (PetscInt)PetscRealPart(array[i]);
2639     ierr = VecRestoreArray(vec_dnz,&array);CHKERRQ(ierr);
2640     ierr = VecGetArray(vec_onz,&array);CHKERRQ(ierr);
2641     for (i=0;i<lrows;i++) onz[i] = (PetscInt)PetscRealPart(array[i]);
2642     ierr = VecRestoreArray(vec_onz,&array);CHKERRQ(ierr);
2643     ierr = VecDestroy(&vec_dnz);CHKERRQ(ierr);
2644     ierr = VecDestroy(&vec_onz);CHKERRQ(ierr);
2645 
2646     /* Resize preallocation if overestimated */
2647     for (i=0;i<lrows;i++) {
2648       dnz[i] = PetscMin(dnz[i],lcols);
2649       onz[i] = PetscMin(onz[i],cols-lcols);
2650     }
2651     /* set preallocation */
2652     ierr = MatMPIAIJSetPreallocation(new_mat,0,dnz,0,onz);CHKERRQ(ierr);
2653     for (i=0;i<lrows/bs;i++) {
2654       dnz[i] = dnz[i*bs]/bs;
2655       onz[i] = onz[i*bs]/bs;
2656     }
2657     ierr = MatMPIBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2658     for (i=0;i<lrows/bs;i++) {
2659       dnz[i] = dnz[i]-i;
2660     }
2661     ierr = MatMPISBAIJSetPreallocation(new_mat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2662     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2663     *M = new_mat;
2664   } else {
2665     PetscInt mbs,mrows,mcols;
2666     /* some checks */
2667     ierr = MatGetBlockSize(*M,&mbs);CHKERRQ(ierr);
2668     ierr = MatGetSize(*M,&mrows,&mcols);CHKERRQ(ierr);
2669     if (mrows != rows) {
2670       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of rows (%d != %d)",rows,mrows);
2671     }
2672     if (mrows != rows) {
2673       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong number of cols (%d != %d)",cols,mcols);
2674     }
2675     if (mbs != bs) {
2676       SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix. Wrong block size (%d != %d)",bs,mbs);
2677     }
2678     ierr = MatZeroEntries(*M);CHKERRQ(ierr);
2679   }
2680   /* set local to global mappings */
2681   /* ierr = MatSetLocalToGlobalMapping(*M,rmapping,cmapping);CHKERRQ(ierr); */
2682   /* Set values */
2683   ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2684   if (isdense) { /* special case for dense local matrices */
2685     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2686     ierr = MatDenseGetArray(matis->A,&array);CHKERRQ(ierr);
2687     ierr = MatSetValues(*M,local_rows,global_indices,local_cols,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2688     ierr = MatDenseRestoreArray(matis->A,&array);CHKERRQ(ierr);
2689     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2690     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2691   } else { /* very basic values insertion for all other matrix types */
2692     ierr = PetscFree(local_indices);CHKERRQ(ierr);
2693     ierr = PetscFree(global_indices);CHKERRQ(ierr);
2694     for (i=0;i<local_rows;i++) {
2695       ierr = MatGetRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2696       /* ierr = MatSetValuesLocal(*M,1,&i,j,local_indices,array,ADD_VALUES);CHKERRQ(ierr); */
2697       ierr = ISLocalToGlobalMappingApply(matis->mapping,j,local_indices,global_indices);CHKERRQ(ierr);
2698       ierr = ISLocalToGlobalMappingApply(matis->mapping,1,&i,&index_row);CHKERRQ(ierr);
2699       ierr = MatSetValues(*M,1,&index_row,j,global_indices,array,ADD_VALUES);CHKERRQ(ierr);
2700       ierr = MatRestoreRow(matis->A,i,&j,(const PetscInt**)&local_indices,(const PetscScalar**)&array);CHKERRQ(ierr);
2701     }
2702   }
2703   ierr = MatAssemblyBegin(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2704   ierr = MatAssemblyEnd(*M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2705   if (isdense) {
2706     ierr = MatSetOption(*M,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
2707   }
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatISGetSubassemblingPattern"
2713 PetscErrorCode MatISGetSubassemblingPattern(Mat mat, PetscInt coarsening_ratio, IS* is_sends)
2714 {
2715   Mat             subdomain_adj;
2716   IS              new_ranks,ranks_send_to;
2717   MatPartitioning partitioner;
2718   Mat_IS          *matis;
2719   PetscInt        n_neighs,*neighs,*n_shared,**shared;
2720   PetscInt        prank;
2721   PetscMPIInt     size,rank,color;
2722   PetscInt        *xadj,*adjncy,*oldranks;
2723   PetscInt        *adjncy_wgt,*v_wgt,*is_indices,*ranks_send_to_idx;
2724   PetscInt        i,j,n_subdomains,local_size,threshold=0;
2725   PetscErrorCode  ierr;
2726   PetscBool       use_vwgt=PETSC_FALSE,use_square=PETSC_FALSE;
2727   PetscSubcomm    subcomm;
2728 
2729   PetscFunctionBegin;
2730   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_square",&use_square,NULL);CHKERRQ(ierr);
2731   ierr = PetscOptionsGetBool(NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr);
2732   ierr = PetscOptionsGetInt(NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr);
2733 
2734   /* Get info on mapping */
2735   matis = (Mat_IS*)(mat->data);
2736   ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&local_size);CHKERRQ(ierr);
2737   ierr = ISLocalToGlobalMappingGetInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2738 
2739   /* build local CSR graph of subdomains' connectivity */
2740   ierr = PetscMalloc(2*sizeof(*xadj),&xadj);CHKERRQ(ierr);
2741   xadj[0] = 0;
2742   xadj[1] = PetscMax(n_neighs-1,0);
2743   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy),&adjncy);CHKERRQ(ierr);
2744   ierr = PetscMalloc(xadj[1]*sizeof(*adjncy_wgt),&adjncy_wgt);CHKERRQ(ierr);
2745 
2746   if (threshold) {
2747     PetscInt* count,min_threshold;
2748     ierr = PetscMalloc(local_size*sizeof(PetscInt),&count);CHKERRQ(ierr);
2749     ierr = PetscMemzero(count,local_size*sizeof(PetscInt));CHKERRQ(ierr);
2750     for (i=1;i<n_neighs;i++) {/* i=1 so I don't count myself -> faces nodes counts to 1 */
2751       for (j=0;j<n_shared[i];j++) {
2752         count[shared[i][j]] += 1;
2753       }
2754     }
2755     /* adapt threshold since we dont want to lose significant connections */
2756     min_threshold = n_neighs;
2757     for (i=1;i<n_neighs;i++) {
2758       for (j=0;j<n_shared[i];j++) {
2759         min_threshold = PetscMin(count[shared[i][j]],min_threshold);
2760       }
2761     }
2762     threshold = PetscMax(min_threshold+1,threshold);
2763     xadj[1] = 0;
2764     for (i=1;i<n_neighs;i++) {
2765       for (j=0;j<n_shared[i];j++) {
2766         if (count[shared[i][j]] < threshold) {
2767           adjncy[xadj[1]] = neighs[i];
2768           adjncy_wgt[xadj[1]] = n_shared[i];
2769           xadj[1]++;
2770           break;
2771         }
2772       }
2773     }
2774     ierr = PetscFree(count);CHKERRQ(ierr);
2775   } else {
2776     if (xadj[1]) {
2777       ierr = PetscMemcpy(adjncy,&neighs[1],xadj[1]*sizeof(*adjncy));CHKERRQ(ierr);
2778       ierr = PetscMemcpy(adjncy_wgt,&n_shared[1],xadj[1]*sizeof(*adjncy_wgt));CHKERRQ(ierr);
2779     }
2780   }
2781   ierr = ISLocalToGlobalMappingRestoreInfo(matis->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr);
2782   if (use_square) {
2783     for (i=0;i<xadj[1];i++) {
2784       adjncy_wgt[i] = adjncy_wgt[i]*adjncy_wgt[i];
2785     }
2786   }
2787   ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2788 
2789   ierr = PetscMalloc(sizeof(PetscInt),&ranks_send_to_idx);CHKERRQ(ierr);
2790 
2791   /*
2792     Restrict work on active processes only.
2793   */
2794   ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&subcomm);CHKERRQ(ierr);
2795   ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */
2796   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
2797   ierr = PetscMPIIntCast(!local_size,&color);CHKERRQ(ierr);
2798   ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr);
2799   if (color) {
2800     ierr = PetscFree(xadj);CHKERRQ(ierr);
2801     ierr = PetscFree(adjncy);CHKERRQ(ierr);
2802     ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr);
2803   } else {
2804     ierr = MPI_Comm_size(subcomm->comm,&size);CHKERRQ(ierr);
2805     ierr = PetscMalloc(size*sizeof(*oldranks),&oldranks);CHKERRQ(ierr);
2806     prank = rank;
2807     ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm->comm);CHKERRQ(ierr);
2808     /*
2809     for (i=0;i<size;i++) {
2810       PetscPrintf(subcomm->comm,"oldranks[%d] = %d\n",i,oldranks[i]);
2811     }
2812     */
2813     for (i=0;i<xadj[1];i++) {
2814       ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr);
2815     }
2816     ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr);
2817     ierr = MatCreateMPIAdj(subcomm->comm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr);
2818     n_subdomains = (PetscInt)size;
2819     /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */
2820 
2821     /* Partition */
2822     ierr = MatPartitioningCreate(subcomm->comm,&partitioner);CHKERRQ(ierr);
2823     ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr);
2824     if (use_vwgt) {
2825       ierr = PetscMalloc(sizeof(*v_wgt),&v_wgt);CHKERRQ(ierr);
2826       v_wgt[0] = local_size;
2827       ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr);
2828     }
2829     /* ierr = PetscPrintf(PetscObjectComm((PetscObject)partitioner),"NPARTS %d\n",n_subdomains/coarsening_ratio);CHKERRQ(ierr); */
2830     ierr = MatPartitioningSetNParts(partitioner,n_subdomains/coarsening_ratio);CHKERRQ(ierr);
2831     ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr);
2832     ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr);
2833     /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */
2834 
2835     ierr = ISGetIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2836     /* ranks_send_to_idx[0] = oldranks[is_indices[0]]; */ /* contiguos set of processes */
2837     ranks_send_to_idx[0] = coarsening_ratio*oldranks[is_indices[0]]; /* scattered set of processes */
2838     ierr = ISRestoreIndices(new_ranks,(const PetscInt**)&is_indices);CHKERRQ(ierr);
2839     /* clean up */
2840     ierr = PetscFree(oldranks);CHKERRQ(ierr);
2841     ierr = ISDestroy(&new_ranks);CHKERRQ(ierr);
2842     ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr);
2843     ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr);
2844   }
2845   ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr);
2846 
2847   /* assemble parallel IS for sends */
2848   i = 1;
2849   if (color) i=0;
2850   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,&ranks_send_to);CHKERRQ(ierr);
2851 
2852   /* get back IS */
2853   *is_sends = ranks_send_to;
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
2858 
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatISSubassemble"
2861 PetscErrorCode MatISSubassemble(Mat mat, IS is_sends, PetscInt coarsening_ratio, MatReuse reuse, Mat *mat_n)
2862 {
2863   Mat                    local_mat;
2864   Mat_IS                 *matis;
2865   IS                     is_sends_internal;
2866   PetscInt               rows,cols;
2867   PetscInt               i,bs,buf_size_idxs,buf_size_vals;
2868   PetscBool              ismatis,isdense;
2869   ISLocalToGlobalMapping l2gmap;
2870   PetscInt*              l2gmap_indices;
2871   const PetscInt*        is_indices;
2872   MatType                new_local_type;
2873   MatTypePrivate         new_local_type_private;
2874   /* buffers */
2875   PetscInt               *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
2876   PetscScalar            *ptr_vals,*send_buffer_vals,*recv_buffer_vals;
2877   /* MPI */
2878   MPI_Comm               comm;
2879   PetscMPIInt            n_sends,n_recvs,commsize;
2880   PetscMPIInt            *iflags,*ilengths_idxs,*ilengths_vals;
2881   PetscMPIInt            *onodes,*olengths_idxs,*olengths_vals;
2882   PetscMPIInt            len,tag_idxs,tag_vals,source_dest;
2883   MPI_Request            *send_req_idxs,*send_req_vals,*recv_req_idxs,*recv_req_vals;
2884   PetscErrorCode         ierr;
2885 
2886   PetscFunctionBegin;
2887   /* checks */
2888   ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr);
2889   if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on an matrix object which is not of type MATIS",__FUNCT__);
2890   ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
2891   ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr);
2892   if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
2893   ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr);
2894   if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
2895   if (reuse == MAT_REUSE_MATRIX) {
2896     PetscInt mrows,mcols,mnrows,mncols;
2897     PetscBool ismatis;
2898     ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr);
2899     if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
2900     ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr);
2901     ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr);
2902     if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
2903     if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
2904   }
2905   ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr);
2906   PetscValidLogicalCollectiveInt(mat,bs,0);
2907   /* prepare IS for sending if not provided */
2908   if (!is_sends) {
2909     if (!coarsening_ratio) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a coarsening ratio");
2910     ierr = MatISGetSubassemblingPattern(mat,coarsening_ratio,&is_sends_internal);CHKERRQ(ierr);
2911   } else {
2912     ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr);
2913     is_sends_internal = is_sends;
2914   }
2915 
2916   /* get pointer of MATIS data */
2917   matis = (Mat_IS*)mat->data;
2918 
2919   /* get comm */
2920   comm = PetscObjectComm((PetscObject)mat);
2921 
2922   /* compute number of sends */
2923   ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr);
2924   ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr);
2925 
2926   /* compute number of receives */
2927   ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr);
2928   ierr = PetscMalloc(commsize*sizeof(*iflags),&iflags);CHKERRQ(ierr);
2929   ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr);
2930   ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
2931   for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
2932   ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr);
2933   ierr = PetscFree(iflags);CHKERRQ(ierr);
2934 
2935   /* prepare send/receive buffers */
2936   ierr = PetscMalloc(commsize*sizeof(*ilengths_idxs),&ilengths_idxs);CHKERRQ(ierr);
2937   ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr);
2938   ierr = PetscMalloc(commsize*sizeof(*ilengths_vals),&ilengths_vals);CHKERRQ(ierr);
2939   ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr);
2940 
2941   /* Get data from local mat */
2942   if (!isdense) {
2943     /* TODO: See below some guidelines on how to prepare the local buffers */
2944     /*
2945        send_buffer_vals should contain the raw values of the local matrix
2946        send_buffer_idxs should contain:
2947        - MatType_PRIVATE type
2948        - PetscInt        size_of_l2gmap
2949        - PetscInt        global_row_indices[size_of_l2gmap]
2950        - PetscInt        all_other_info_which_is_needed_to_compute_preallocation_and_set_values
2951     */
2952   } else {
2953     ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
2954     ierr = ISLocalToGlobalMappingGetSize(matis->mapping,&i);CHKERRQ(ierr);
2955     ierr = PetscMalloc((i+2)*sizeof(PetscInt),&send_buffer_idxs);CHKERRQ(ierr);
2956     send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
2957     send_buffer_idxs[1] = i;
2958     ierr = ISLocalToGlobalMappingGetIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2959     ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr);
2960     ierr = ISLocalToGlobalMappingRestoreIndices(matis->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr);
2961     ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr);
2962     for (i=0;i<n_sends;i++) {
2963       ilengths_vals[is_indices[i]] = len*len;
2964       ilengths_idxs[is_indices[i]] = len+2;
2965     }
2966   }
2967   ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr);
2968   buf_size_idxs = 0;
2969   buf_size_vals = 0;
2970   for (i=0;i<n_recvs;i++) {
2971     buf_size_idxs += (PetscInt)olengths_idxs[i];
2972     buf_size_vals += (PetscInt)olengths_vals[i];
2973   }
2974   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&recv_buffer_idxs);CHKERRQ(ierr);
2975   ierr = PetscMalloc(buf_size_vals*sizeof(PetscScalar),&recv_buffer_vals);CHKERRQ(ierr);
2976 
2977   /* get new tags for clean communications */
2978   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr);
2979   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr);
2980 
2981   /* allocate for requests */
2982   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_idxs);CHKERRQ(ierr);
2983   ierr = PetscMalloc(n_sends*sizeof(MPI_Request),&send_req_vals);CHKERRQ(ierr);
2984   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_idxs);CHKERRQ(ierr);
2985   ierr = PetscMalloc(n_recvs*sizeof(MPI_Request),&recv_req_vals);CHKERRQ(ierr);
2986 
2987   /* communications */
2988   ptr_idxs = recv_buffer_idxs;
2989   ptr_vals = recv_buffer_vals;
2990   for (i=0;i<n_recvs;i++) {
2991     source_dest = onodes[i];
2992     ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr);
2993     ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr);
2994     ptr_idxs += olengths_idxs[i];
2995     ptr_vals += olengths_vals[i];
2996   }
2997   for (i=0;i<n_sends;i++) {
2998     ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr);
2999     ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr);
3000     ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr);
3001   }
3002   ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr);
3003   ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr);
3004 
3005   /* assemble new l2g map */
3006   ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3007   ptr_idxs = recv_buffer_idxs;
3008   buf_size_idxs = 0;
3009   for (i=0;i<n_recvs;i++) {
3010     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3011     ptr_idxs += olengths_idxs[i];
3012   }
3013   ierr = PetscMalloc(buf_size_idxs*sizeof(PetscInt),&l2gmap_indices);CHKERRQ(ierr);
3014   ptr_idxs = recv_buffer_idxs;
3015   buf_size_idxs = 0;
3016   for (i=0;i<n_recvs;i++) {
3017     ierr = PetscMemcpy(&l2gmap_indices[buf_size_idxs],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr);
3018     buf_size_idxs += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
3019     ptr_idxs += olengths_idxs[i];
3020   }
3021   ierr = PetscSortRemoveDupsInt(&buf_size_idxs,l2gmap_indices);CHKERRQ(ierr);
3022   ierr = ISLocalToGlobalMappingCreate(comm,buf_size_idxs,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr);
3023   ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr);
3024 
3025   /* infer new local matrix type from received local matrices type */
3026   /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
3027   /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
3028   new_local_type_private = MATAIJ_PRIVATE;
3029   new_local_type = MATSEQAIJ;
3030   if (n_recvs) {
3031     new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
3032     ptr_idxs = recv_buffer_idxs;
3033     for (i=0;i<n_recvs;i++) {
3034       if ((PetscInt)new_local_type_private != *ptr_idxs) {
3035         new_local_type_private = MATAIJ_PRIVATE;
3036         break;
3037       }
3038       ptr_idxs += olengths_idxs[i];
3039     }
3040     switch (new_local_type_private) {
3041       case MATDENSE_PRIVATE: /* subassembling of dense matrices does not give a dense matrix! */
3042         new_local_type = MATSEQAIJ;
3043         bs = 1;
3044         break;
3045       case MATAIJ_PRIVATE:
3046         new_local_type = MATSEQAIJ;
3047         bs = 1;
3048         break;
3049       case MATBAIJ_PRIVATE:
3050         new_local_type = MATSEQBAIJ;
3051         break;
3052       case MATSBAIJ_PRIVATE:
3053         new_local_type = MATSEQSBAIJ;
3054         break;
3055       default:
3056         SETERRQ2(comm,PETSC_ERR_LIB,"Unkwown private type %d in %s",new_local_type_private,__FUNCT__);
3057         break;
3058     }
3059   }
3060 
3061   /* create MATIS object if needed */
3062   if (reuse == MAT_INITIAL_MATRIX) {
3063     ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3064     ierr = MatCreateIS(comm,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,mat_n);CHKERRQ(ierr);
3065   } else {
3066     /* it also destroys the local matrices */
3067     ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr);
3068   }
3069   ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr);
3070   ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr);
3071   ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr);
3072   ierr = MatSetUp(local_mat);CHKERRQ(ierr); /* WARNING -> no preallocation yet */
3073 
3074   /* set values */
3075   ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3076   ptr_vals = recv_buffer_vals;
3077   ptr_idxs = recv_buffer_idxs;
3078   for (i=0;i<n_recvs;i++) {
3079     if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
3080       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3081       ierr = MatSetValues(*mat_n,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr);
3082       ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3083       ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr);
3084       ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3085     }
3086     ptr_idxs += olengths_idxs[i];
3087     ptr_vals += olengths_vals[i];
3088   }
3089   ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090   ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091   ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3092   ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3093 
3094   { /* check */
3095     Vec       lvec,rvec;
3096     PetscReal infty_error;
3097 
3098     ierr = MatGetVecs(mat,&rvec,&lvec);CHKERRQ(ierr);
3099     ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr);
3100     ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr);
3101     ierr = VecScale(lvec,-1.0);CHKERRQ(ierr);
3102     ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr);
3103     ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3104     ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
3105     ierr = VecDestroy(&rvec);CHKERRQ(ierr);
3106     ierr = VecDestroy(&lvec);CHKERRQ(ierr);
3107   }
3108 
3109   /* free workspace */
3110   ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr);
3111   ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr);
3112   ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3113   ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr);
3114   ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
3115   if (isdense) {
3116     ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr);
3117     ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr);
3118   } else {
3119     /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */
3120   }
3121   ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr);
3122   ierr = PetscFree(recv_req_vals);CHKERRQ(ierr);
3123   ierr = PetscFree(send_req_idxs);CHKERRQ(ierr);
3124   ierr = PetscFree(send_req_vals);CHKERRQ(ierr);
3125   ierr = PetscFree(ilengths_vals);CHKERRQ(ierr);
3126   ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr);
3127   ierr = PetscFree(olengths_vals);CHKERRQ(ierr);
3128   ierr = PetscFree(olengths_idxs);CHKERRQ(ierr);
3129   ierr = PetscFree(onodes);CHKERRQ(ierr);
3130   PetscFunctionReturn(0);
3131 }
3132 
3133 #undef __FUNCT__
3134 #define __FUNCT__ "PCBDDCSetUpCoarseSolver"
3135 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
3136 {
3137   PC_BDDC                *pcbddc = (PC_BDDC*)pc->data;
3138   PC_IS                  *pcis = (PC_IS*)pc->data;
3139   Mat                    coarse_mat,coarse_mat_is,coarse_submat_dense;
3140   MatNullSpace           CoarseNullSpace=NULL;
3141   ISLocalToGlobalMapping coarse_islg;
3142   IS                     coarse_is;
3143   PetscInt               max_it;
3144   PetscInt               im_active=-1,active_procs=-1;
3145   PC                     pc_temp;
3146   PCType                 coarse_pc_type;
3147   KSPType                coarse_ksp_type;
3148   PetscBool              multilevel_requested,multilevel_allowed;
3149   PetscBool              setsym,issym,isherm,ispreonly,isbddc,isnn,coarse_reuse;
3150   MatStructure           matstruct;
3151   PetscErrorCode         ierr;
3152 
3153   PetscFunctionBegin;
3154   /* Assign global numbering to coarse dofs */
3155   if (pcbddc->new_primal_space) { /* a new primal space is present, so recompute global numbering */
3156     PetscInt ocoarse_size;
3157     ocoarse_size = pcbddc->coarse_size;
3158     ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr);
3159     ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr);
3160     /* see if we can avoid some work */
3161     if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
3162       if (ocoarse_size != pcbddc->coarse_size) { /* ...but with different size, so reset it and set reuse flag to false */
3163         ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr);
3164         coarse_reuse = PETSC_FALSE;
3165       } else { /* we can safely reuse already computed coarse matrix */
3166         coarse_reuse = PETSC_TRUE;
3167       }
3168     } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
3169       coarse_reuse = PETSC_FALSE;
3170     }
3171     /* reset any subassembling information */
3172     ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3173   } else { /* primal space has not been changed, so we can reuse coarse matrix */
3174     coarse_reuse = PETSC_TRUE;
3175   }
3176 
3177   /* test if we can go multilevel */
3178   multilevel_allowed = PETSC_FALSE;
3179   multilevel_requested = PETSC_FALSE;
3180   if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
3181   if (multilevel_requested) {
3182     /* count "active processes" */
3183     im_active = !!(pcis->n);
3184     ierr = MPI_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr);
3185     if (active_procs/pcbddc->coarsening_ratio < 2) {
3186       multilevel_allowed = PETSC_FALSE;
3187     } else {
3188       multilevel_allowed = PETSC_TRUE;
3189     }
3190   }
3191 
3192   /* set defaults for coarse KSP and PC */
3193   if (multilevel_allowed) {
3194     coarse_ksp_type = KSPRICHARDSON;
3195     coarse_pc_type = PCBDDC;
3196   } else {
3197     coarse_ksp_type = KSPPREONLY;
3198     coarse_pc_type = PCREDUNDANT;
3199   }
3200 
3201   /* create the coarse KSP object only once with defaults */
3202   if (!pcbddc->coarse_ksp) {
3203     char prefix[256],str_level[3];
3204     size_t len;
3205     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_ksp);CHKERRQ(ierr);
3206     ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr);
3207     ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr);
3208     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3209     ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3210     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3211     /* prefix */
3212     ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr);
3213     ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr);
3214     if (!pcbddc->current_level) {
3215       ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr);
3216       ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr);
3217     } else {
3218       ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr);
3219       if (pcbddc->current_level>1) len -= 2;
3220       ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len);CHKERRQ(ierr);
3221       *(prefix+len)='\0';
3222       sprintf(str_level,"%d_",(int)(pcbddc->current_level));
3223       ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr);
3224     }
3225     ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr);
3226   }
3227   /* allow user customization */
3228   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s before setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3229   ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr);
3230   /* ierr = PetscPrintf(PETSC_COMM_WORLD,"Type of %s after setting from options %s\n",((PetscObject)pcbddc->coarse_ksp)->prefix,((PetscObject)pcbddc->coarse_ksp)->type_name);CHKERRQ(ierr); */
3231 
3232   /* get some info after set from options */
3233   ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr);
3234   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr);
3235   ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr);
3236   if (isbddc && !multilevel_allowed) { /* prevent from infinite loop if user as requested bddc pc for coarse solver */
3237     ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr);
3238     ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr);
3239     isbddc = PETSC_FALSE;
3240   }
3241 
3242   /* creates temporary l2gmap and IS for coarse indexes */
3243   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr);
3244   ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr);
3245 
3246   /* propagate BDDC info to the next level */
3247   ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr);
3248   ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3249   ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr);
3250   if (isbddc) { /* protects from unneded computations */
3251     PetscInt               *tidxs,*tidxs2,nout,tsize,i;
3252     const PetscInt         *idxs;
3253     IS                     *c_isfordofs,c_isneumann;
3254     ISLocalToGlobalMapping tmap;
3255 
3256     /* create map between primal indices (in local representative ordering) and local subdomain numbering */
3257     ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr);
3258     /* allocate space for temporary storage */
3259     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs);CHKERRQ(ierr);
3260     ierr = PetscMalloc(pcbddc->local_primal_size*sizeof(PetscInt),&tidxs2);CHKERRQ(ierr);
3261     /* dofs splitting */
3262     ierr = PetscMalloc(pcbddc->n_ISForDofsLocal*sizeof(IS),&c_isfordofs);CHKERRQ(ierr);
3263     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3264       /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */
3265       ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr);
3266       ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3267       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3268       ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr);
3269       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3270       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->ISForDofsLocal[i]),nout,tidxs2,PETSC_COPY_VALUES,&c_isfordofs[i]);CHKERRQ(ierr);
3271       /* ierr = ISView(c_isfordofs[i],0);CHKERRQ(ierr); */
3272     }
3273     ierr = PCBDDCSetDofsSplitting(pc_temp,pcbddc->n_ISForDofsLocal,c_isfordofs);CHKERRQ(ierr);
3274     for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
3275       ierr = ISDestroy(&c_isfordofs[i]);CHKERRQ(ierr);
3276     }
3277     ierr = PetscFree(c_isfordofs);CHKERRQ(ierr);
3278     /* neumann boundaries */
3279     if (pcbddc->NeumannBoundariesLocal) {
3280       /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */
3281       ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr);
3282       ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3283       ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr);
3284       ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr);
3285       ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr);
3286       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pcbddc->NeumannBoundariesLocal),nout,tidxs2,PETSC_COPY_VALUES,&c_isneumann);CHKERRQ(ierr);
3287       /* ierr = ISView(c_isneumann,0);CHKERRQ(ierr); */
3288       ierr = PCBDDCSetNeumannBoundaries(pc_temp,c_isneumann);CHKERRQ(ierr);
3289     }
3290     ierr = ISDestroy(&c_isneumann);CHKERRQ(ierr);
3291     ierr = PetscFree(tidxs);CHKERRQ(ierr);
3292     ierr = PetscFree(tidxs2);CHKERRQ(ierr);
3293     ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr);
3294   }
3295 
3296   /* creates temporary MATIS object for coarse matrix */
3297   ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);CHKERRQ(ierr);
3298   ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,&coarse_mat_is);CHKERRQ(ierr);
3299   ierr = MatISSetLocalMat(coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr);
3300   ierr = MatAssemblyBegin(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3301   ierr = MatAssemblyEnd(coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3302   ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr);
3303   ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr);
3304 
3305   /* assemble coarse matrix */
3306   if (isbddc || isnn) {
3307     if (!pcbddc->coarse_subassembling) { /* subassembling info is not present */
3308       ierr = MatISGetSubassemblingPattern(coarse_mat_is,pcbddc->coarsening_ratio,&pcbddc->coarse_subassembling);CHKERRQ(ierr);
3309       if (pcbddc->dbg_flag) {
3310         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3311         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Subassembling pattern\n");CHKERRQ(ierr);
3312         ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr);
3313       }
3314     }
3315     if (coarse_reuse) {
3316       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3317       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3318       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3319     } else {
3320       ierr = MatISSubassemble(coarse_mat_is,pcbddc->coarse_subassembling,pcbddc->coarsening_ratio,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3321     }
3322   } else {
3323     if (coarse_reuse) {
3324       ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL,NULL);CHKERRQ(ierr);
3325       ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr);
3326       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_REUSE_MATRIX,&coarse_mat);CHKERRQ(ierr);
3327     } else {
3328       ierr = MatISGetMPIXAIJ(coarse_mat_is,MATMPIAIJ,MAT_INITIAL_MATRIX,&coarse_mat);CHKERRQ(ierr);
3329     }
3330   }
3331   ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr);
3332 
3333   /* create local to global scatters for coarse problem */
3334   if (pcbddc->new_primal_space) {
3335     ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr);
3336     ierr = VecDestroy(&pcbddc->coarse_rhs);CHKERRQ(ierr);
3337     ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3338     ierr = MatGetVecs(coarse_mat,&pcbddc->coarse_vec,&pcbddc->coarse_rhs);CHKERRQ(ierr);
3339     ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr);
3340   }
3341   ierr = ISDestroy(&coarse_is);CHKERRQ(ierr);
3342 
3343   /* propagate symmetry info to coarse matrix */
3344   issym = PETSC_FALSE;
3345   ierr = MatIsSymmetricKnown(pc->pmat,&setsym,&issym);CHKERRQ(ierr);
3346   ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,issym);CHKERRQ(ierr);
3347   isherm = PETSC_FALSE;
3348   ierr = MatIsHermitianKnown(pc->pmat,&setsym,&isherm);CHKERRQ(ierr);
3349   ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
3350 
3351   /* Compute coarse null space (special handling by BDDC only) */
3352   if (pcbddc->NullSpace) {
3353     ierr = PCBDDCNullSpaceAssembleCoarse(pc,coarse_mat,&CoarseNullSpace);CHKERRQ(ierr);
3354     if (isbddc) {
3355       ierr = PCBDDCSetNullSpace(pc_temp,CoarseNullSpace);CHKERRQ(ierr);
3356     } else {
3357       ierr = KSPSetNullSpace(pcbddc->coarse_ksp,CoarseNullSpace);CHKERRQ(ierr);
3358     }
3359   }
3360 
3361   /* set operators */
3362   ierr = PCGetOperators(pc,NULL,NULL,&matstruct);CHKERRQ(ierr);
3363   ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat,matstruct);CHKERRQ(ierr);
3364 
3365   /* additional KSP customization */
3366   ierr = KSPGetTolerances(pcbddc->coarse_ksp,NULL,NULL,NULL,&max_it);CHKERRQ(ierr);
3367   if (max_it < 5) {
3368     ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr);
3369   }
3370 
3371   /* print some info if requested */
3372   if (pcbddc->dbg_flag) {
3373     ierr = KSPGetType(pcbddc->coarse_ksp,&coarse_ksp_type);CHKERRQ(ierr);
3374     ierr = PCGetType(pc_temp,&coarse_pc_type);CHKERRQ(ierr);
3375     if (!multilevel_allowed) {
3376       if (multilevel_requested) {
3377         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr);
3378       } else if (pcbddc->max_levels) {
3379         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr);
3380       }
3381     }
3382     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Calling %s/%s setup at level %d for coarse solver (%s)\n",coarse_ksp_type,coarse_pc_type,pcbddc->current_level,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr);
3383     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3384   }
3385 
3386   /* setup coarse ksp */
3387   ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr);
3388 
3389   /* Check coarse problem if in debug mode or if solving with an iterative method */
3390   ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr);
3391   if (pcbddc->dbg_flag || !ispreonly) {
3392     KSP       check_ksp;
3393     KSPType   check_ksp_type;
3394     PC        check_pc;
3395     Vec       check_vec;
3396     PetscReal abs_infty_error,infty_error,lambda_min,lambda_max;
3397     PetscInt  its;
3398     PetscBool compute;
3399 
3400     /* Create ksp object suitable for estimation of extreme eigenvalues */
3401     ierr = KSPCreate(PetscObjectComm((PetscObject)pc),&check_ksp);CHKERRQ(ierr);
3402     ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat,SAME_PRECONDITIONER);CHKERRQ(ierr);
3403     ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr);
3404     if (ispreonly) {
3405       check_ksp_type = KSPPREONLY;
3406       compute = PETSC_FALSE;
3407     } else {
3408       check_ksp_type = KSPGMRES;
3409       compute = PETSC_TRUE;
3410     }
3411     ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr);
3412     ierr = KSPSetComputeSingularValues(check_ksp,compute);CHKERRQ(ierr);
3413     ierr = KSPSetUp(check_ksp);CHKERRQ(ierr);
3414     ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr);
3415     ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr);
3416     /* create random vec */
3417     ierr = VecDuplicate(pcbddc->coarse_vec,&check_vec);CHKERRQ(ierr);
3418     ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr);
3419     if (CoarseNullSpace) {
3420       ierr = MatNullSpaceRemove(CoarseNullSpace,check_vec);CHKERRQ(ierr);
3421     }
3422     ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3423     /* solve coarse problem */
3424     ierr = KSPSolve(check_ksp,pcbddc->coarse_rhs,pcbddc->coarse_vec);CHKERRQ(ierr);
3425     if (CoarseNullSpace) {
3426       ierr = MatNullSpaceRemove(CoarseNullSpace,pcbddc->coarse_vec);CHKERRQ(ierr);
3427     }
3428     /* set eigenvalue estimation if preonly has not been requested */
3429     if (!ispreonly) {
3430       ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max,&lambda_min);CHKERRQ(ierr);
3431       /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Coarse problem eigenvalues: %1.6e %1.6e\n",lambda_min,lambda_max);CHKERRQ(ierr); */
3432       ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max,lambda_min);CHKERRQ(ierr);
3433       ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr);
3434     }
3435     /* check coarse problem residual error */
3436     if (pcbddc->dbg_flag) {
3437       ierr = VecAXPY(check_vec,-1.0,pcbddc->coarse_vec);CHKERRQ(ierr);
3438       ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr);
3439       ierr = MatMult(coarse_mat,check_vec,pcbddc->coarse_rhs);CHKERRQ(ierr);
3440       ierr = VecNorm(pcbddc->coarse_rhs,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr);
3441       ierr = VecDestroy(&check_vec);CHKERRQ(ierr);
3442       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem (%s) details\n",((PetscObject)(pcbddc->coarse_ksp))->prefix);CHKERRQ(ierr);
3443       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem exact infty_error   : %1.6e\n",infty_error);CHKERRQ(ierr);
3444       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr);
3445       if (!ispreonly) {
3446         ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr);
3447         ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s): %1.6e %1.6e\n",its,check_ksp_type,lambda_min,lambda_max);CHKERRQ(ierr);
3448       }
3449       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3450     }
3451     ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr);
3452   }
3453 
3454   /* print additional info */
3455   if (pcbddc->dbg_flag) {
3456     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr);
3457     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3458     ierr = KSPView(pcbddc->coarse_ksp,pcbddc->dbg_viewer);CHKERRQ(ierr);
3459     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3460   }
3461 
3462   /* free memory */
3463   ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr);
3464   ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr);
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 #undef __FUNCT__
3469 #define __FUNCT__ "PCBDDCComputePrimalNumbering"
3470 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
3471 {
3472   PC_BDDC*       pcbddc = (PC_BDDC*)pc->data;
3473   PC_IS*         pcis = (PC_IS*)pc->data;
3474   Mat_IS*        matis = (Mat_IS*)pc->pmat->data;
3475   PetscInt       i,coarse_size;
3476   PetscInt       *local_primal_indices;
3477   PetscErrorCode ierr;
3478 
3479   PetscFunctionBegin;
3480   /* Compute global number of coarse dofs */
3481   if (!pcbddc->primal_indices_local_idxs && pcbddc->local_primal_size) {
3482     SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Local primal indices have not been created");
3483   }
3484   ierr = PCBDDCSubsetNumbering(PetscObjectComm((PetscObject)(pc->pmat)),matis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,NULL,&coarse_size,&local_primal_indices);CHKERRQ(ierr);
3485 
3486   /* check numbering */
3487   if (pcbddc->dbg_flag) {
3488     PetscScalar coarsesum,*array;
3489 
3490     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3491     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr);
3492     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr);
3493     ierr = PetscViewerASCIISynchronizedAllow(pcbddc->dbg_viewer,PETSC_TRUE);CHKERRQ(ierr);
3494     ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr);
3495     for (i=0;i<pcbddc->local_primal_size;i++) {
3496       ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr);
3497     }
3498     ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr);
3499     ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr);
3500     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3501     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3502     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3503     ierr = VecScatterBegin(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3504     ierr = VecScatterEnd(matis->ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3505     ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3506     for (i=0;i<pcis->n;i++) {
3507       if (array[i] == 1.0) {
3508         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d owned by a single process!\n",PetscGlobalRank,i);CHKERRQ(ierr);
3509       }
3510     }
3511     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3512     for (i=0;i<pcis->n;i++) {
3513       if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
3514     }
3515     ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr);
3516     ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3517     ierr = VecScatterBegin(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3518     ierr = VecScatterEnd(matis->ctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3519     ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr);
3520     ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr);
3521     if (pcbddc->dbg_flag > 1) {
3522       ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr);
3523       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3524       ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr);
3525       for (i=0;i<pcbddc->local_primal_size;i++) {
3526         ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i]);
3527       }
3528       ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3529     }
3530     ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr);
3531   }
3532   /* get back data */
3533   *coarse_size_n = coarse_size;
3534   *local_primal_indices_n = local_primal_indices;
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 #undef __FUNCT__
3539 #define __FUNCT__ "PCBDDCGlobalToLocal"
3540 PetscErrorCode PCBDDCGlobalToLocal(PC pc,VecScatter ctx,IS globalis,IS* localis)
3541 {
3542   PC_IS*         pcis = (PC_IS*)pc->data;
3543   IS             localis_t;
3544   PetscInt       i,lsize,*idxs;
3545   PetscScalar    *vals;
3546   PetscErrorCode ierr;
3547 
3548   PetscFunctionBegin;
3549   /* get dirichlet indices in local ordering exploiting local to global map */
3550   ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr);
3551   ierr = PetscMalloc(lsize*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3552   for (i=0;i<lsize;i++) vals[i] = 1.0;
3553   ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3554   ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr);
3555   if (idxs) { /* multilevel guard */
3556     ierr = VecSetValues(pcis->vec1_global,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr);
3557   }
3558   ierr = VecAssemblyBegin(pcis->vec1_global);CHKERRQ(ierr);
3559   ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr);
3560   ierr = PetscFree(vals);CHKERRQ(ierr);
3561   ierr = VecAssemblyEnd(pcis->vec1_global);CHKERRQ(ierr);
3562   /* now compute dirichlet set in local ordering */
3563   ierr = VecScatterBegin(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3564   ierr = VecScatterEnd(ctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3565   ierr = VecGetArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3566   for (i=0,lsize=0;i<pcis->n;i++) {
3567     if (vals[i] == 1.0) {
3568       lsize++;
3569     }
3570   }
3571   ierr = PetscMalloc(lsize*sizeof(PetscInt),&idxs);CHKERRQ(ierr);
3572   for (i=0,lsize=0;i<pcis->n;i++) {
3573     if (vals[i] == 1.0) {
3574       idxs[lsize++] = i;
3575     }
3576   }
3577   ierr = VecRestoreArrayRead(pcis->vec1_N,(const PetscScalar**)&vals);CHKERRQ(ierr);
3578   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr);
3579   *localis = localis_t;
3580   PetscFunctionReturn(0);
3581 }
3582